nnetsauce
1from .attention import AttentionMechanism 2from .base.base import Base 3from .base.baseRegressor import BaseRegressor 4from .boosting.adaBoostClassifier import AdaBoostClassifier 5from .custom.customClassifier import CustomClassifier 6from .custom.customRegressor import CustomRegressor 7from .custom.customBackpropRegressor import CustomBackPropRegressor 8from .datasets import Downloader 9from .deep.deepClassifier import DeepClassifier 10from .deep.deepRegressor import DeepRegressor 11from .deep.deepMTS import DeepMTS 12from .glm.glmClassifier import GLMClassifier 13from .glm.glmRegressor import GLMRegressor 14from .kernel.kernel import KernelRidge 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 20from .mts.mts import MTS 21from .mts.mlarch import MLARCH 22from .mts.classical import ClassicalMTS 23from .mts.stackedmts import MTSStacker 24from .multitask.multitaskClassifier import MultitaskClassifier 25from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 26from .neuralnet.neuralnetregression import NeuralNetRegressor 27from .neuralnet.neuralnetclassification import NeuralNetClassifier 28from .optimizers.optimizer import Optimizer 29from .predictioninterval import PredictionInterval 30from .predictionset import PredictionSet 31from .quantile.quantileregression import QuantileRegressor 32from .quantile.quantileclassification import QuantileClassifier 33from .randombag.randomBagClassifier import RandomBagClassifier 34from .randombag.randomBagRegressor import RandomBagRegressor 35from .randomfourier.randomfourier import RandomFourierEstimator 36from .ridge.ridge import RidgeRegressor 37from .ridge2.ridge2Classifier import Ridge2Classifier 38from .ridge2.ridge2Regressor import Ridge2Regressor 39from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 40from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 41from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 42from .sampling import SubSampler 43from .updater import RegressorUpdater, ClassifierUpdater 44from .votingregressor import MedianVotingRegressor 45 46__all__ = [ 47 "AdaBoostClassifier", 48 "AttentionMechanism", 49 "Base", 50 "BaseRegressor", 51 "BayesianRVFLRegressor", 52 "BayesianRVFL2Regressor", 53 "ClassicalMTS", 54 "CustomClassifier", 55 "CustomRegressor", 56 "CustomBackPropRegressor", 57 "DeepClassifier", 58 "DeepRegressor", 59 "DeepMTS", 60 "Downloader", 61 "GLMClassifier", 62 "GLMRegressor", 63 "KernelRidge", 64 "LazyClassifier", 65 "LazyRegressor", 66 "LazyDeepClassifier", 67 "LazyDeepRegressor", 68 "LazyMTS", 69 "LazyDeepMTS", 70 "MLARCH", 71 "MedianVotingRegressor", 72 "MTS", 73 "MTSStacker", 74 "MultitaskClassifier", 75 "NeuralNetRegressor", 76 "NeuralNetClassifier", 77 "PredictionInterval", 78 "PredictionSet", 79 "SimpleMultitaskClassifier", 80 "Optimizer", 81 "QuantileRegressor", 82 "QuantileClassifier", 83 "RandomBagRegressor", 84 "RandomBagClassifier", 85 "RandomFourierEstimator", 86 "RegressorUpdater", 87 "ClassifierUpdater", 88 "RidgeRegressor", 89 "Ridge2Regressor", 90 "Ridge2Classifier", 91 "Ridge2MultitaskClassifier", 92 "SubSampler", 93]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self 376 377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1) 394 395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
10class AttentionMechanism: 11 """ 12 A comprehensive class implementing various attention mechanisms 13 for both univariate time series and tabular data using JAX. 14 15 Supported attention types: 16 - Scaled Dot-Product Attention 17 - Additive (Bahdanau) Attention 18 - Multi-Head Attention 19 - Self-Attention 20 - Temporal Attention (for sequences) 21 - Feature Attention (for tabular data) 22 - Cross-Attention 23 - Context Vector Attention 24 """ 25 26 def __init__( 27 self, 28 input_dim: int, 29 hidden_dim: int = 64, 30 num_heads: int = 4, 31 dropout: float = 0.1, 32 seed: int = 42, 33 ): 34 """ 35 Args: 36 input_dim: Dimension of input features 37 hidden_dim: Hidden dimension for attention computations 38 num_heads: Number of attention heads for multi-head attention 39 dropout: Dropout rate 40 seed: Random seed for parameter initialization 41 """ 42 self.input_dim = input_dim 43 self.hidden_dim = hidden_dim 44 self.num_heads = num_heads 45 self.dropout = dropout 46 47 # Initialize random key 48 self.rng = random.PRNGKey(seed) 49 50 # Initialize parameters 51 self.params = self._initialize_parameters() 52 53 assert ( 54 hidden_dim % num_heads == 0 55 ), "hidden_dim must be divisible by num_heads" 56 self.head_dim = hidden_dim // num_heads 57 58 def _initialize_parameters(self) -> Dict: 59 """Initialize all network parameters using JAX""" 60 keys = random.split(self.rng, 20) 61 62 def init_weight(key, shape): 63 return random.normal(key, shape) * np.sqrt(2.0 / shape[0]) 64 65 def init_bias(shape): 66 return jnp.zeros(shape) 67 68 params = { 69 # Scaled Dot-Product Attention 70 "query_w": init_weight(keys[0], (self.input_dim, self.hidden_dim)), 71 "query_b": init_bias((self.hidden_dim,)), 72 "key_w": init_weight(keys[1], (self.input_dim, self.hidden_dim)), 73 "key_b": init_bias((self.hidden_dim,)), 74 "value_w": init_weight(keys[2], (self.input_dim, self.hidden_dim)), 75 "value_b": init_bias((self.hidden_dim,)), 76 # Additive Attention 77 "additive_query_w": init_weight( 78 keys[3], (self.input_dim, self.hidden_dim) 79 ), 80 "additive_query_b": init_bias((self.hidden_dim,)), 81 "additive_key_w": init_weight( 82 keys[4], (self.input_dim, self.hidden_dim) 83 ), 84 "additive_key_b": init_bias((self.hidden_dim,)), 85 "additive_v_w": init_weight(keys[5], (self.hidden_dim, 1)), 86 "additive_v_b": init_bias((1,)), 87 # Multi-Head Attention 88 "mha_query_w": init_weight( 89 keys[6], (self.input_dim, self.hidden_dim) 90 ), 91 "mha_query_b": init_bias((self.hidden_dim,)), 92 "mha_key_w": init_weight( 93 keys[7], (self.input_dim, self.hidden_dim) 94 ), 95 "mha_key_b": init_bias((self.hidden_dim,)), 96 "mha_value_w": init_weight( 97 keys[8], (self.input_dim, self.hidden_dim) 98 ), 99 "mha_value_b": init_bias((self.hidden_dim,)), 100 "mha_output_w": init_weight( 101 keys[9], (self.hidden_dim, self.hidden_dim) 102 ), 103 "mha_output_b": init_bias((self.hidden_dim,)), 104 # Feature Attention 105 "feature_w1": init_weight( 106 keys[10], (self.input_dim, self.hidden_dim) 107 ), 108 "feature_b1": init_bias((self.hidden_dim,)), 109 "feature_w2": init_weight( 110 keys[11], (self.hidden_dim, self.input_dim) 111 ), 112 "feature_b2": init_bias((self.input_dim,)), 113 # Temporal Attention 114 "temporal_query_w": init_weight( 115 keys[12], (self.input_dim, self.hidden_dim) 116 ), 117 "temporal_query_b": init_bias((self.hidden_dim,)), 118 "temporal_key_w": init_weight( 119 keys[13], (self.input_dim, self.hidden_dim) 120 ), 121 "temporal_key_b": init_bias((self.hidden_dim,)), 122 # Context Vector Attention 123 "context_vector": random.normal(keys[14], (1, 1, self.hidden_dim)), 124 "context_query_w": init_weight( 125 keys[15], (self.hidden_dim, self.hidden_dim) 126 ), 127 "context_query_b": init_bias((self.hidden_dim,)), 128 "context_key_w": init_weight( 129 keys[16], (self.input_dim, self.hidden_dim) 130 ), 131 "context_key_b": init_bias((self.hidden_dim,)), 132 "context_value_w": init_weight( 133 keys[17], (self.input_dim, self.hidden_dim) 134 ), 135 "context_value_b": init_bias((self.hidden_dim,)), 136 } 137 138 return params 139 140 @staticmethod 141 @jit 142 def _apply_dropout( 143 x: jnp.ndarray, 144 key: jax.random.PRNGKey, 145 rate: float, 146 training: bool = True, 147 ) -> jnp.ndarray: 148 """Apply dropout""" 149 if training and rate > 0: 150 keep_prob = 1 - rate 151 mask = random.bernoulli(key, keep_prob, x.shape) 152 return jnp.where(mask, x / keep_prob, 0) 153 return x 154 155 @partial(jit, static_argnums=(0,)) 156 def scaled_dot_product_attention( 157 self, 158 query: jnp.ndarray, 159 key: jnp.ndarray, 160 value: jnp.ndarray, 161 params: Dict, 162 mask: Optional[jnp.ndarray] = None, 163 training: bool = False, 164 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 165 """ 166 Scaled Dot-Product Attention 167 168 Args: 169 query: (batch_size, seq_len, input_dim) or (batch_size, input_dim) 170 key: (batch_size, seq_len, input_dim) 171 value: (batch_size, seq_len, input_dim) 172 params: Parameter dictionary 173 mask: Optional mask (batch_size, seq_len) 174 training: Whether in training mode 175 176 Returns: 177 context: Attended context vector 178 attention_weights: Attention weights 179 """ 180 # Project inputs 181 Q = jnp.dot(query, params["query_w"]) + params["query_b"] 182 K = jnp.dot(key, params["key_w"]) + params["key_b"] 183 V = jnp.dot(value, params["value_w"]) + params["value_b"] 184 185 # Compute attention scores 186 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) 187 scores = scores / jnp.sqrt(self.hidden_dim) 188 189 # Apply mask if provided 190 if mask is not None: 191 scores = jnp.where(mask == 0, -1e9, scores) 192 193 # Compute attention weights 194 attention_weights = jax.nn.softmax(scores, axis=-1) 195 196 # Apply attention to values 197 context = jnp.matmul(attention_weights, V) 198 199 return context, attention_weights 200 201 @partial(jit, static_argnums=(0,)) 202 def additive_attention( 203 self, 204 query: jnp.ndarray, 205 key: jnp.ndarray, 206 value: jnp.ndarray, 207 params: Dict, 208 mask: Optional[jnp.ndarray] = None, 209 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 210 """ 211 Additive (Bahdanau) Attention 212 213 Args: 214 query: (batch_size, hidden_dim) or (batch_size, 1, hidden_dim) 215 key: (batch_size, seq_len, hidden_dim) 216 value: (batch_size, seq_len, hidden_dim) 217 params: Parameter dictionary 218 mask: Optional mask 219 220 Returns: 221 context: Attended context vector 222 attention_weights: Attention weights 223 """ 224 # Ensure query has seq_len dimension 225 if query.ndim == 2: 226 query = jnp.expand_dims(query, axis=1) 227 228 # Project query and key 229 Q = ( 230 jnp.dot(query, params["additive_query_w"]) 231 + params["additive_query_b"] 232 ) 233 K = jnp.dot(key, params["additive_key_w"]) + params["additive_key_b"] 234 235 # Additive attention: score = v^T tanh(W_q Q + W_k K) 236 combined = jnp.tanh(Q + K) 237 scores = ( 238 jnp.dot(combined, params["additive_v_w"]) + params["additive_v_b"] 239 ) 240 scores = jnp.squeeze(scores, axis=-1) 241 242 # Apply mask if provided 243 if mask is not None: 244 scores = jnp.where(mask == 0, -1e9, scores) 245 246 # Compute attention weights 247 attention_weights = jax.nn.softmax(scores, axis=-1) 248 249 # Apply attention to values 250 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), value) 251 context = jnp.squeeze(context, axis=1) 252 253 return context, attention_weights 254 255 @partial(jit, static_argnums=(0,)) 256 def multi_head_attention( 257 self, 258 query: jnp.ndarray, 259 key: jnp.ndarray, 260 value: jnp.ndarray, 261 params: Dict, 262 mask: Optional[jnp.ndarray] = None, 263 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 264 """ 265 Multi-Head Attention 266 267 Args: 268 query: (batch_size, seq_len_q, input_dim) 269 key: (batch_size, seq_len_k, input_dim) 270 value: (batch_size, seq_len_v, input_dim) 271 params: Parameter dictionary 272 mask: Optional mask 273 274 Returns: 275 output: Multi-head attention output 276 attention_weights: Attention weights from all heads 277 """ 278 batch_size = query.shape[0] 279 280 # Project and reshape for multi-head attention 281 Q = jnp.dot(query, params["mha_query_w"]) + params["mha_query_b"] 282 K = jnp.dot(key, params["mha_key_w"]) + params["mha_key_b"] 283 V = jnp.dot(value, params["mha_value_w"]) + params["mha_value_b"] 284 285 Q = Q.reshape(batch_size, -1, self.num_heads, self.head_dim) 286 K = K.reshape(batch_size, -1, self.num_heads, self.head_dim) 287 V = V.reshape(batch_size, -1, self.num_heads, self.head_dim) 288 289 # Transpose for attention: (batch, num_heads, seq_len, head_dim) 290 Q = jnp.transpose(Q, (0, 2, 1, 3)) 291 K = jnp.transpose(K, (0, 2, 1, 3)) 292 V = jnp.transpose(V, (0, 2, 1, 3)) 293 294 # Compute attention scores 295 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 296 self.head_dim 297 ) 298 299 # Apply mask if provided 300 if mask is not None: 301 mask_expanded = jnp.expand_dims(jnp.expand_dims(mask, 1), 2) 302 scores = jnp.where(mask_expanded == 0, -1e9, scores) 303 304 # Attention weights 305 attention_weights = jax.nn.softmax(scores, axis=-1) 306 307 # Apply attention to values 308 context = jnp.matmul(attention_weights, V) 309 310 # Reshape back: (batch, seq_len, hidden_dim) 311 context = jnp.transpose(context, (0, 2, 1, 3)) 312 context = context.reshape(batch_size, -1, self.hidden_dim) 313 314 # Final linear projection 315 output = ( 316 jnp.dot(context, params["mha_output_w"]) + params["mha_output_b"] 317 ) 318 319 return output, attention_weights 320 321 @partial(jit, static_argnums=(0,)) 322 def self_attention( 323 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 324 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 325 """Self-Attention mechanism""" 326 return self.scaled_dot_product_attention(x, x, x, params, mask) 327 328 @partial(jit, static_argnums=(0,)) 329 def temporal_attention( 330 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 331 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 332 """ 333 Temporal Attention for time series data 334 335 Args: 336 x: (batch_size, seq_len, input_dim) 337 params: Parameter dictionary 338 mask: Optional mask 339 340 Returns: 341 context: Temporally attended context 342 attention_weights: Temporal attention weights 343 """ 344 # Use last time step as query 345 query = x[:, -1:, :] 346 347 Q = ( 348 jnp.dot(query, params["temporal_query_w"]) 349 + params["temporal_query_b"] 350 ) 351 K = jnp.dot(x, params["temporal_key_w"]) + params["temporal_key_b"] 352 353 # Compute attention scores 354 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 355 self.hidden_dim 356 ) 357 scores = jnp.squeeze(scores, axis=1) 358 359 # Apply mask if provided 360 if mask is not None: 361 scores = jnp.where(mask == 0, -1e9, scores) 362 363 # Attention weights 364 attention_weights = jax.nn.softmax(scores, axis=-1) 365 366 # Apply attention 367 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), x) 368 context = jnp.squeeze(context, axis=1) 369 370 return context, attention_weights 371 372 @partial(jit, static_argnums=(0,)) 373 def feature_attention_tabular( 374 self, x: jnp.ndarray, params: Dict 375 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 376 """ 377 Feature Attention for tabular data 378 379 Args: 380 x: (batch_size, num_features) 381 params: Parameter dictionary 382 383 Returns: 384 output: Feature-weighted output 385 attention_weights: Feature importance weights 386 """ 387 # Compute feature attention weights 388 hidden = jnp.dot(x, params["feature_w1"]) + params["feature_b1"] 389 hidden = jnp.tanh(hidden) 390 logits = jnp.dot(hidden, params["feature_w2"]) + params["feature_b2"] 391 attention_weights = jax.nn.softmax(logits, axis=-1) 392 393 # Apply attention to features 394 output = x * attention_weights 395 396 return output, attention_weights 397 398 @partial(jit, static_argnums=(0,)) 399 def context_vector_attention( 400 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 401 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 402 """ 403 Context Vector Attention 404 Uses a learnable global context vector as the query. 405 406 Args: 407 x: (batch_size, seq_len, input_dim) 408 params: Parameter dictionary 409 mask: Optional mask (batch_size, seq_len) 410 411 Returns: 412 context: Global context representation (batch_size, hidden_dim) 413 attention_weights: Attention weights (batch_size, seq_len) 414 """ 415 batch_size = x.shape[0] 416 417 # Expand context vector for batch 418 context_vec = jnp.broadcast_to( 419 params["context_vector"], (batch_size, 1, self.hidden_dim) 420 ) 421 422 # Project context vector and input 423 Q = ( 424 jnp.dot(context_vec, params["context_query_w"]) 425 + params["context_query_b"] 426 ) 427 K = jnp.dot(x, params["context_key_w"]) + params["context_key_b"] 428 V = jnp.dot(x, params["context_value_w"]) + params["context_value_b"] 429 430 # Compute attention scores 431 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 432 self.hidden_dim 433 ) 434 scores = jnp.squeeze(scores, axis=1) 435 436 # Apply mask if provided 437 if mask is not None: 438 scores = jnp.where(mask == 0, -1e9, scores) 439 440 # Compute attention weights 441 attention_weights = jax.nn.softmax(scores, axis=-1) 442 443 # Apply attention to values 444 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), V) 445 context = jnp.squeeze(context, axis=1) 446 447 return context, attention_weights 448 449 @partial(jit, static_argnums=(0,)) 450 def cross_attention( 451 self, 452 query: jnp.ndarray, 453 key_value: jnp.ndarray, 454 params: Dict, 455 mask: Optional[jnp.ndarray] = None, 456 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 457 """Cross-Attention between two different sequences""" 458 return self.scaled_dot_product_attention( 459 query, key_value, key_value, params, mask 460 ) 461 462 def __call__( 463 self, 464 x: jnp.ndarray, 465 attention_type: str = "scaled_dot_product", 466 query: Optional[jnp.ndarray] = None, 467 key_value: Optional[jnp.ndarray] = None, 468 mask: Optional[jnp.ndarray] = None, 469 training: bool = False, 470 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 471 """ 472 Forward pass with specified attention mechanism 473 474 Args: 475 x: Input tensor 476 attention_type: Type of attention to use 477 query: Optional query for cross-attention 478 key_value: Optional key-value for cross-attention 479 mask: Optional mask 480 training: Whether in training mode 481 482 Returns: 483 output: Attention output 484 attention_weights: Attention weights 485 """ 486 if attention_type == "scaled_dot_product": 487 return self.scaled_dot_product_attention( 488 x, x, x, self.params, mask, training 489 ) 490 elif attention_type == "additive": 491 return self.additive_attention( 492 x[:, -1:, :], x, x, self.params, mask 493 ) 494 elif attention_type == "multi_head": 495 return self.multi_head_attention(x, x, x, self.params, mask) 496 elif attention_type == "self": 497 return self.self_attention(x, self.params, mask) 498 elif attention_type == "temporal": 499 return self.temporal_attention(x, self.params, mask) 500 elif attention_type == "feature": 501 return self.feature_attention_tabular(x, self.params) 502 elif attention_type == "cross": 503 if query is None or key_value is None: 504 raise ValueError( 505 "Cross-attention requires both query and key_value" 506 ) 507 return self.cross_attention(query, key_value, self.params, mask) 508 elif attention_type == "context_vector": 509 return self.context_vector_attention(x, self.params, mask) 510 else: 511 raise ValueError(f"Unknown attention type: {attention_type}")
A comprehensive class implementing various attention mechanisms for both univariate time series and tabular data using JAX.
Supported attention types:
- Scaled Dot-Product Attention
- Additive (Bahdanau) Attention
- Multi-Head Attention
- Self-Attention
- Temporal Attention (for sequences)
- Feature Attention (for tabular data)
- Cross-Attention
- Context Vector Attention
48class Base(BaseEstimator): 49 """Base model from which all the other classes inherit. 50 51 This class contains the most important data preprocessing/feature engineering methods. 52 53 Parameters: 54 55 n_hidden_features: int 56 number of nodes in the hidden layer 57 58 activation_name: str 59 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 60 61 a: float 62 hyperparameter for 'prelu' or 'elu' activation function 63 64 nodes_sim: str 65 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 66 'uniform' 67 68 bias: boolean 69 indicates if the hidden layer contains a bias term (True) or 70 not (False) 71 72 dropout: float 73 regularization parameter; (random) percentage of nodes dropped out 74 of the training 75 76 direct_link: boolean 77 indicates if the original features are included (True) in model's 78 fitting or not (False) 79 80 n_clusters: int 81 number of clusters for type_clust='kmeans' or type_clust='gmm' 82 clustering (could be 0: no clustering) 83 84 cluster_encode: bool 85 defines how the variable containing clusters is treated (default is one-hot); 86 if `False`, then labels are used, without one-hot encoding 87 88 type_clust: str 89 type of clustering method: currently k-means ('kmeans') or Gaussian 90 Mixture Model ('gmm') 91 92 type_scaling: a tuple of 3 strings 93 scaling methods for inputs, hidden layer, and clustering respectively 94 (and when relevant). 95 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 96 97 col_sample: float 98 percentage of features randomly chosen for training 99 100 row_sample: float 101 percentage of rows chosen for training, by stratified bootstrapping 102 103 seed: int 104 reproducibility seed for nodes_sim=='uniform', clustering and dropout 105 106 backend: str 107 "cpu" or "gpu" or "tpu" 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 activation_name="relu", 117 a=0.01, 118 nodes_sim="sobol", 119 bias=True, 120 dropout=0, 121 direct_link=True, 122 n_clusters=2, 123 cluster_encode=True, 124 type_clust="kmeans", 125 type_scaling=("std", "std", "std"), 126 col_sample=1, 127 row_sample=1, 128 seed=123, 129 backend="cpu", 130 ): 131 # input checks ----- 132 133 sys_platform = platform.system() 134 135 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 136 warnings.warn( 137 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 138 ) 139 backend = "cpu" 140 141 assert activation_name in ( 142 "relu", 143 "tanh", 144 "sigmoid", 145 "prelu", 146 "elu", 147 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 148 149 assert nodes_sim in ( 150 "sobol", 151 "hammersley", 152 "uniform", 153 "halton", 154 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 155 156 assert type_clust in ( 157 "kmeans", 158 "gmm", 159 ), "'type_clust' must be in ('kmeans', 'gmm')" 160 161 assert (len(type_scaling) == 3) & all( 162 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 163 for i in range(len(type_scaling)) 164 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 165 166 assert (col_sample >= 0) & ( 167 col_sample <= 1 168 ), "'col_sample' must be comprised between 0 and 1 (both included)" 169 170 assert backend in ( 171 "cpu", 172 "gpu", 173 "tpu", 174 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 175 176 self.n_hidden_features = n_hidden_features 177 self.activation_name = activation_name 178 self.a = a 179 self.nodes_sim = nodes_sim 180 self.bias = bias 181 self.seed = seed 182 self.backend = backend 183 self.dropout = dropout 184 self.direct_link = direct_link 185 self.cluster_encode = cluster_encode 186 self.type_clust = type_clust 187 self.type_scaling = type_scaling 188 self.col_sample = col_sample 189 self.row_sample = row_sample 190 self.n_clusters = n_clusters 191 if isinstance(self, RegressorMixin): 192 self.type_fit = "regression" 193 elif isinstance(self, ClassifierMixin): 194 self.type_fit = "classification" 195 self.subsampler_ = None 196 self.index_col_ = None 197 self.index_row_ = True 198 self.clustering_obj_ = None 199 self.clustering_scaler_ = None 200 self.nn_scaler_ = None 201 self.scaler_ = None 202 self.encoder_ = None 203 self.W_ = None 204 self.X_ = None 205 self.y_ = None 206 self.y_mean_ = None 207 self.beta_ = None 208 209 # activation function ----- 210 if sys_platform in ("Linux", "Darwin"): 211 activation_options = { 212 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 213 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 214 "sigmoid": ( 215 ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid 216 ), 217 "prelu": partial(ac.prelu, a=a), 218 "elu": ( 219 partial(ac.elu, a=a) 220 if (self.backend == "cpu") 221 else partial(jnn.elu, a=a) 222 ), 223 } 224 else: # on Windows currently, no JAX 225 activation_options = { 226 "relu": ( 227 ac.relu if (self.backend == "cpu") else NotImplementedError 228 ), 229 "tanh": ( 230 np.tanh if (self.backend == "cpu") else NotImplementedError 231 ), 232 "sigmoid": ( 233 ac.sigmoid 234 if (self.backend == "cpu") 235 else NotImplementedError 236 ), 237 "prelu": partial(ac.prelu, a=a), 238 "elu": ( 239 partial(ac.elu, a=a) 240 if (self.backend == "cpu") 241 else NotImplementedError 242 ), 243 } 244 self.activation_func = activation_options[activation_name] 245 246 # "preprocessing" methods to be inherited ----- 247 248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 # scale input data before clustering 286 self.clustering_scaler_, scaled_X = mo.scale_covariates( 287 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 288 ) 289 290 self.clustering_obj_, X_clustered = mo.cluster_covariates( 291 scaled_X, 292 self.n_clusters, 293 self.seed, 294 type_clust=self.type_clust, 295 **kwargs 296 ) 297 298 if self.cluster_encode: 299 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 300 np.float16 301 ) 302 303 return X_clustered.astype(np.float16) 304 305 # if predict == True, encode test set 306 X_clustered = self.clustering_obj_.predict( 307 self.clustering_scaler_.transform(X) 308 ) 309 310 if self.cluster_encode == True: 311 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 312 np.float16 313 ) 314 315 return X_clustered.astype(np.float16) 316 317 def create_layer(self, scaled_X, W=None): 318 """Create hidden layer. 319 320 Parameters: 321 322 scaled_X: {array-like}, shape = [n_samples, n_features] 323 Training vectors, where n_samples is the number 324 of samples and n_features is the number of features 325 326 W: {array-like}, shape = [n_features, hidden_features] 327 if provided, constructs the hidden layer with W; otherwise computed internally 328 329 Returns: 330 331 Hidden layer matrix: {array-like} 332 333 """ 334 335 n_features = scaled_X.shape[1] 336 337 # hash_sim = { 338 # "sobol": generate_sobol, 339 # "hammersley": generate_hammersley, 340 # "uniform": generate_uniform, 341 # "halton": generate_halton 342 # } 343 344 if self.bias is False: # no bias term in the hidden layer 345 if W is None: 346 if self.nodes_sim == "sobol": 347 self.W_ = generate_sobol( 348 n_dims=n_features, 349 n_points=self.n_hidden_features, 350 seed=self.seed, 351 ) 352 elif self.nodes_sim == "hammersley": 353 self.W_ = generate_hammersley( 354 n_dims=n_features, 355 n_points=self.n_hidden_features, 356 seed=self.seed, 357 ) 358 elif self.nodes_sim == "uniform": 359 self.W_ = generate_uniform( 360 n_dims=n_features, 361 n_points=self.n_hidden_features, 362 seed=self.seed, 363 ) 364 else: 365 self.W_ = generate_halton( 366 n_dims=n_features, 367 n_points=self.n_hidden_features, 368 seed=self.seed, 369 ) 370 371 assert ( 372 scaled_X.shape[1] == self.W_.shape[0] 373 ), "check dimensions of covariates X and matrix W" 374 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot( 378 a=scaled_X, b=self.W_, backend=self.backend 379 ) 380 ), 381 drop_prob=self.dropout, 382 seed=self.seed, 383 ) 384 385 # W is not none 386 assert ( 387 scaled_X.shape[1] == W.shape[0] 388 ), "check dimensions of covariates X and matrix W" 389 390 # self.W_ = W 391 return mo.dropout( 392 x=self.activation_func( 393 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 394 ), 395 drop_prob=self.dropout, 396 seed=self.seed, 397 ) 398 399 # with bias term in the hidden layer 400 if W is None: 401 n_features_1 = n_features + 1 402 403 if self.nodes_sim == "sobol": 404 self.W_ = generate_sobol( 405 n_dims=n_features_1, 406 n_points=self.n_hidden_features, 407 seed=self.seed, 408 ) 409 elif self.nodes_sim == "hammersley": 410 self.W_ = generate_hammersley( 411 n_dims=n_features_1, 412 n_points=self.n_hidden_features, 413 seed=self.seed, 414 ) 415 elif self.nodes_sim == "uniform": 416 self.W_ = generate_uniform( 417 n_dims=n_features_1, 418 n_points=self.n_hidden_features, 419 seed=self.seed, 420 ) 421 else: 422 self.W_ = generate_halton( 423 n_dims=n_features_1, 424 n_points=self.n_hidden_features, 425 seed=self.seed, 426 ) 427 428 # self.W_ = hash_sim[self.nodes_sim]( 429 # n_dims=n_features_1, 430 # n_points=self.n_hidden_features, 431 # seed=self.seed, 432 # ) 433 434 return mo.dropout( 435 x=self.activation_func( 436 mo.safe_sparse_dot( 437 a=mo.cbind( 438 np.ones(scaled_X.shape[0]), 439 scaled_X, 440 backend=self.backend, 441 ), 442 b=self.W_, 443 backend=self.backend, 444 ) 445 ), 446 drop_prob=self.dropout, 447 seed=self.seed, 448 ) 449 450 # W is not None 451 # self.W_ = W 452 return mo.dropout( 453 x=self.activation_func( 454 mo.safe_sparse_dot( 455 a=mo.cbind( 456 np.ones(scaled_X.shape[0]), 457 scaled_X, 458 backend=self.backend, 459 ), 460 b=W, 461 backend=self.backend, 462 ) 463 ), 464 drop_prob=self.dropout, 465 seed=self.seed, 466 ) 467 468 def _jax_create_layer( 469 self, scaled_X: jnp.ndarray, W: Optional[jnp.ndarray] = None 470 ) -> jnp.ndarray: 471 """JAX-compatible version of create_layer that exactly matches the original functionality.""" 472 key = jax.random.PRNGKey(self.seed) 473 n_features = scaled_X.shape[1] 474 475 # Generate weights if not provided 476 if W is None: 477 if self.bias: 478 n_features_1 = n_features + 1 479 shape = (n_features_1, self.n_hidden_features) 480 else: 481 shape = (n_features, self.n_hidden_features) 482 483 # JAX-compatible weight generation matching original behavior 484 if self.nodes_sim == "sobol": 485 W_np = generate_sobol( 486 n_dims=n_features_1, 487 n_points=self.n_hidden_features, 488 seed=self.seed, 489 ) 490 W = jnp.asarray(W_np) 491 elif self.nodes_sim == "hammersley": 492 W_np = generate_hammersley( 493 n_dims=n_features_1, 494 n_points=self.n_hidden_features, 495 seed=self.seed, 496 ) 497 W = jnp.asarray(W_np) 498 elif self.nodes_sim == "uniform": 499 key, subkey = jax.random.split(key) 500 W = jax.random.uniform( 501 subkey, shape=shape, minval=-1.0, maxval=1.0 502 ) 503 else: # halton 504 W_np = generate_halton( 505 n_dims=n_features_1, 506 n_points=self.n_hidden_features, 507 seed=self.seed, 508 ) 509 W = jnp.asarray(W_np) 510 511 self.W_ = np.array(W) # Store as numpy for original methods 512 513 # Prepare input with bias if needed 514 if self.bias: 515 X_with_bias = jnp.hstack( 516 [jnp.ones((scaled_X.shape[0], 1)), scaled_X] 517 ) 518 print("X_with_bias shape:", X_with_bias.shape) 519 print("W shape:", W.shape) 520 linear_output = jnp.dot(X_with_bias, W) 521 else: 522 linear_output = jnp.dot(scaled_X, W) 523 524 # Apply activation function 525 if self.activation_name == "relu": 526 activated = jax.nn.relu(linear_output) 527 elif self.activation_name == "tanh": 528 activated = jnp.tanh(linear_output) 529 elif self.activation_name == "sigmoid": 530 activated = jax.nn.sigmoid(linear_output) 531 else: # leaky relu 532 activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 533 534 # Apply dropout 535 if self.dropout > 0: 536 key, subkey = jax.random.split(key) 537 mask = jax.random.bernoulli( 538 subkey, p=1 - self.dropout, shape=activated.shape 539 ) 540 activated = jnp.where(mask, activated / (1 - self.dropout), 0) 541 542 return activated 543 544 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 545 """Create new hidden features for training set, with hidden layer, center the response. 546 547 Parameters: 548 549 y: array-like, shape = [n_samples] 550 Target values 551 552 X: {array-like}, shape = [n_samples, n_features] 553 Training vectors, where n_samples is the number 554 of samples and n_features is the number of features 555 556 W: {array-like}, shape = [n_features, hidden_features] 557 if provided, constructs the hidden layer via W 558 559 Returns: 560 561 (centered response, direct link + hidden layer matrix): {tuple} 562 563 """ 564 565 # either X and y are stored or not 566 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 567 if self.n_hidden_features > 0: # has a hidden layer 568 assert ( 569 len(self.type_scaling) >= 2 570 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 571 572 if X is None: 573 if self.col_sample == 1: 574 input_X = self.X_ 575 else: 576 n_features = self.X_.shape[1] 577 new_n_features = int(np.ceil(n_features * self.col_sample)) 578 assert ( 579 new_n_features >= 1 580 ), "check class attribute 'col_sample' and the number of covariates provided for X" 581 np.random.seed(self.seed) 582 index_col = np.random.choice( 583 range(n_features), size=new_n_features, replace=False 584 ) 585 self.index_col_ = index_col 586 input_X = self.X_[:, self.index_col_] 587 588 else: # X is not None # keep X vs self.X_ 589 if isinstance(X, pd.DataFrame): 590 X = copy.deepcopy(X.values.astype(float)) 591 592 if self.col_sample == 1: 593 input_X = X 594 else: 595 n_features = X.shape[1] 596 new_n_features = int(np.ceil(n_features * self.col_sample)) 597 assert ( 598 new_n_features >= 1 599 ), "check class attribute 'col_sample' and the number of covariates provided for X" 600 np.random.seed(self.seed) 601 index_col = np.random.choice( 602 range(n_features), size=new_n_features, replace=False 603 ) 604 self.index_col_ = index_col 605 input_X = X[:, self.index_col_] 606 607 if self.n_clusters <= 0: 608 # data without any clustering: self.n_clusters is None ----- 609 610 if self.n_hidden_features > 0: # with hidden layer 611 self.nn_scaler_, scaled_X = mo.scale_covariates( 612 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 613 ) 614 Phi_X = ( 615 self.create_layer(scaled_X) 616 if W is None 617 else self.create_layer(scaled_X, W=W) 618 ) 619 Z = ( 620 mo.cbind(input_X, Phi_X, backend=self.backend) 621 if self.direct_link is True 622 else Phi_X 623 ) 624 self.scaler_, scaled_Z = mo.scale_covariates( 625 Z, choice=self.type_scaling[0], scaler=self.scaler_ 626 ) 627 else: # no hidden layer 628 Z = input_X 629 self.scaler_, scaled_Z = mo.scale_covariates( 630 Z, choice=self.type_scaling[0], scaler=self.scaler_ 631 ) 632 633 else: 634 # data with clustering: self.n_clusters is not None ----- # keep 635 636 augmented_X = mo.cbind( 637 input_X, 638 self.encode_clusters(input_X, **kwargs), 639 backend=self.backend, 640 ) 641 642 if self.n_hidden_features > 0: # with hidden layer 643 self.nn_scaler_, scaled_X = mo.scale_covariates( 644 augmented_X, 645 choice=self.type_scaling[1], 646 scaler=self.nn_scaler_, 647 ) 648 Phi_X = ( 649 self.create_layer(scaled_X) 650 if W is None 651 else self.create_layer(scaled_X, W=W) 652 ) 653 Z = ( 654 mo.cbind(augmented_X, Phi_X, backend=self.backend) 655 if self.direct_link is True 656 else Phi_X 657 ) 658 self.scaler_, scaled_Z = mo.scale_covariates( 659 Z, choice=self.type_scaling[0], scaler=self.scaler_ 660 ) 661 else: # no hidden layer 662 Z = augmented_X 663 self.scaler_, scaled_Z = mo.scale_covariates( 664 Z, choice=self.type_scaling[0], scaler=self.scaler_ 665 ) 666 667 # Returning model inputs ----- 668 if mx.is_factor(y) is False: # regression 669 # center y 670 if y is None: 671 self.y_mean_, centered_y = mo.center_response(self.y_) 672 else: 673 self.y_mean_, centered_y = mo.center_response(y) 674 675 # y is subsampled 676 if self.row_sample < 1: 677 n, p = Z.shape 678 679 self.subsampler_ = ( 680 SubSampler( 681 y=self.y_, row_sample=self.row_sample, seed=self.seed 682 ) 683 if y is None 684 else SubSampler( 685 y=y, row_sample=self.row_sample, seed=self.seed 686 ) 687 ) 688 689 self.index_row_ = self.subsampler_.subsample() 690 691 n_row_sample = len(self.index_row_) 692 # regression 693 return ( 694 centered_y[self.index_row_].reshape(n_row_sample), 695 self.scaler_.transform( 696 Z[self.index_row_, :].reshape(n_row_sample, p) 697 ), 698 ) 699 # y is not subsampled 700 # regression 701 return (centered_y, self.scaler_.transform(Z)) 702 703 # classification 704 # y is subsampled 705 if self.row_sample < 1: 706 n, p = Z.shape 707 708 self.subsampler_ = ( 709 SubSampler( 710 y=self.y_, row_sample=self.row_sample, seed=self.seed 711 ) 712 if y is None 713 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 714 ) 715 716 self.index_row_ = self.subsampler_.subsample() 717 718 n_row_sample = len(self.index_row_) 719 # classification 720 return ( 721 y[self.index_row_].reshape(n_row_sample), 722 self.scaler_.transform( 723 Z[self.index_row_, :].reshape(n_row_sample, p) 724 ), 725 ) 726 # y is not subsampled 727 # classification 728 return (y, self.scaler_.transform(Z)) 729 730 def cook_test_set(self, X, **kwargs): 731 """Transform data from test set, with hidden layer. 732 733 Parameters: 734 735 X: {array-like}, shape = [n_samples, n_features] 736 Training vectors, where n_samples is the number 737 of samples and n_features is the number of features 738 739 **kwargs: additional parameters to be passed to self.encode_cluster 740 741 Returns: 742 743 Transformed test set : {array-like} 744 """ 745 746 if isinstance(X, pd.DataFrame): 747 X = copy.deepcopy(X.values.astype(float)) 748 749 if len(X.shape) == 1: 750 X = X.reshape(1, -1) 751 752 if ( 753 self.n_clusters == 0 754 ): # data without clustering: self.n_clusters is None ----- 755 if self.n_hidden_features > 0: 756 # if hidden layer 757 scaled_X = ( 758 self.nn_scaler_.transform(X) 759 if (self.col_sample == 1) 760 else self.nn_scaler_.transform(X[:, self.index_col_]) 761 ) 762 Phi_X = self.create_layer(scaled_X, self.W_) 763 if self.direct_link: 764 return self.scaler_.transform( 765 mo.cbind(scaled_X, Phi_X, backend=self.backend) 766 ) 767 # when self.direct_link == False 768 return self.scaler_.transform(Phi_X) 769 # if no hidden layer # self.n_hidden_features == 0 770 return self.scaler_.transform(X) 771 772 # data with clustering: self.n_clusters > 0 ----- 773 if self.col_sample == 1: 774 predicted_clusters = self.encode_clusters( 775 X=X, predict=True, **kwargs 776 ) 777 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 778 else: 779 predicted_clusters = self.encode_clusters( 780 X=X[:, self.index_col_], predict=True, **kwargs 781 ) 782 augmented_X = mo.cbind( 783 X[:, self.index_col_], predicted_clusters, backend=self.backend 784 ) 785 786 if self.n_hidden_features > 0: # if hidden layer 787 scaled_X = self.nn_scaler_.transform(augmented_X) 788 Phi_X = self.create_layer(scaled_X, self.W_) 789 if self.direct_link: 790 return self.scaler_.transform( 791 mo.cbind(augmented_X, Phi_X, backend=self.backend) 792 ) 793 return self.scaler_.transform(Phi_X) 794 795 # if no hidden layer 796 return self.scaler_.transform(augmented_X) 797 798 def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs): 799 """JAX-compatible version of cook_training_set that maintains side effects.""" 800 # Initialize random key 801 key = jax.random.PRNGKey(self.seed) 802 803 # Convert inputs to JAX arrays 804 X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_) 805 y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_) 806 807 # Handle column sampling 808 if self.col_sample < 1: 809 n_features = X.shape[1] 810 new_n_features = int(jnp.ceil(n_features * self.col_sample)) 811 assert new_n_features >= 1, "Invalid col_sample" 812 813 key, subkey = jax.random.split(key) 814 index_col = jax.random.choice( 815 subkey, n_features, shape=(new_n_features,), replace=False 816 ) 817 self.index_col_ = np.array( 818 index_col 819 ) # Store as numpy for original methods 820 input_X = X[:, index_col] 821 n_features = ( 822 new_n_features # Update n_features after column sampling 823 ) 824 else: 825 input_X = X 826 n_features = X.shape[1] 827 828 augmented_X = input_X 829 830 # JAX-compatible scaling 831 def jax_scale(data, mean=None, std=None): 832 if mean is None: 833 mean = jnp.mean(data, axis=0) 834 if std is None: 835 std = jnp.std(data, axis=0) 836 return (data - mean) / (std + 1e-10), mean, std 837 838 # Hidden layer processing 839 if self.n_hidden_features > 0: 840 # Initialize weights if not provided 841 if W is None: 842 shape = (n_features, self.n_hidden_features) 843 844 # JAX-compatible weight generation 845 if self.nodes_sim == "uniform": 846 key, subkey = jax.random.split(key) 847 W = jax.random.uniform( 848 subkey, shape=shape, minval=-1.0, maxval=1.0 849 ) * (1 / jnp.sqrt(n_features)) 850 else: 851 # For other sequences, use numpy generation then convert to JAX 852 if self.nodes_sim == "sobol": 853 W_np = generate_sobol( 854 n_dims=shape[0], 855 n_points=shape[1], 856 seed=self.seed, 857 ) 858 elif self.nodes_sim == "hammersley": 859 W_np = generate_hammersley( 860 n_dims=shape[0], 861 n_points=shape[1], 862 seed=self.seed, 863 ) 864 elif self.nodes_sim == "halton": 865 W_np = generate_halton( 866 n_dims=shape[0], 867 n_points=shape[1], 868 seed=self.seed, 869 ) 870 else: # default to uniform 871 key, subkey = jax.random.split(key) 872 W = jax.random.uniform( 873 subkey, shape=shape, minval=-1.0, maxval=1.0 874 ) * (1 / jnp.sqrt(n_features)) 875 876 if self.nodes_sim in ["sobol", "hammersley", "halton"]: 877 W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features)) 878 879 self.W_ = np.array(W) # Store as numpy for original methods 880 881 # Scale features 882 scaled_X, self.nn_mean_, self.nn_std_ = jax_scale( 883 augmented_X, 884 getattr(self, "nn_mean_", None), 885 getattr(self, "nn_std_", None), 886 ) 887 888 # Create hidden layer with proper bias handling 889 linear_output = jnp.dot(scaled_X, W) 890 891 # Apply activation 892 if self.activation_name == "relu": 893 Phi_X = jax.nn.relu(linear_output) 894 elif self.activation_name == "tanh": 895 Phi_X = jnp.tanh(linear_output) 896 elif self.activation_name == "sigmoid": 897 Phi_X = jax.nn.sigmoid(linear_output) 898 else: # leaky relu 899 Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 900 901 # Apply dropout 902 if self.dropout > 0: 903 key, subkey = jax.random.split(key) 904 mask = jax.random.bernoulli( 905 subkey, p=1 - self.dropout, shape=Phi_X.shape 906 ) 907 Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0) 908 909 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 910 else: 911 Z = augmented_X 912 913 # Final scaling 914 scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale( 915 Z, 916 getattr(self, "scale_mean_", None), 917 getattr(self, "scale_std_", None), 918 ) 919 920 # Center response for regression 921 if not hasattr(mx, "is_factor") or not mx.is_factor( 922 y 923 ): # regression case 924 self.y_mean_ = float( 925 jnp.mean(y) 926 ) # Convert to Python float for compatibility 927 centered_y = y - self.y_mean_ 928 else: 929 centered_y = y 930 931 # Handle row sampling 932 if self.row_sample < 1: 933 key, subkey = jax.random.split(key) 934 n_samples = Z.shape[0] 935 n_row_sample = int(jnp.ceil(n_samples * self.row_sample)) 936 index_row = jax.random.choice( 937 subkey, n_samples, shape=(n_row_sample,), replace=False 938 ) 939 self.index_row_ = np.array( 940 index_row 941 ) # Store as numpy for original methods 942 return (centered_y[index_row], scaled_Z[index_row]) 943 944 return (centered_y, scaled_Z) 945 946 def cook_test_set_jax(self, X, **kwargs): 947 """JAX-compatible test set processing with matching dimension handling.""" 948 X = jnp.asarray(X) 949 950 if len(X.shape) == 1: 951 X = X.reshape(1, -1) 952 953 # Handle column sampling 954 input_X = ( 955 X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)] 956 ) 957 958 augmented_X = input_X 959 960 # JAX-compatible scaling 961 scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10) 962 963 # Process hidden layer if needed 964 if self.n_hidden_features > 0: 965 Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_)) 966 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 967 else: 968 Z = augmented_X 969 970 # Final scaling 971 scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10) 972 973 return scaled_Z 974 975 def _jax_create_layer(self, X, W): 976 """JAX-compatible hidden layer creation.""" 977 # print("X", X.shape) 978 # print("W", W.shape) 979 # print("self.W_", self.W_.shape) 980 linear_output = jnp.dot(X, W) 981 982 if self.activation_name == "relu": 983 return jax.nn.relu(linear_output) 984 elif self.activation_name == "tanh": 985 return jnp.tanh(linear_output) 986 elif self.activation_name == "sigmoid": 987 return jax.nn.sigmoid(linear_output) 988 else: # leaky relu 989 return jax.nn.leaky_relu(linear_output, negative_slope=self.a) 990 991 def cross_val_score( 992 self, 993 X, 994 y, 995 cv=5, 996 scoring="accuracy", 997 random_state=42, 998 n_jobs=-1, 999 epsilon=0.5, 1000 penalized=True, 1001 objective="abs", 1002 **kwargs 1003 ): 1004 """ 1005 Penalized Cross-validation score for a model. 1006 1007 Parameters: 1008 1009 X: {array-like}, shape = [n_samples, n_features] 1010 Training vectors, where n_samples is the number 1011 of samples and n_features is the number of features 1012 1013 y: array-like, shape = [n_samples] 1014 Target values 1015 1016 X_test: {array-like}, shape = [n_samples, n_features] 1017 Test vectors, where n_samples is the number 1018 of samples and n_features is the number of features 1019 1020 y_test: array-like, shape = [n_samples] 1021 Target values 1022 1023 cv: int 1024 Number of folds 1025 1026 scoring: str 1027 Scoring metric 1028 1029 random_state: int 1030 Random state 1031 1032 n_jobs: int 1033 Number of jobs to run in parallel 1034 1035 epsilon: float 1036 Penalty parameter 1037 1038 penalized: bool 1039 Whether to obtain penalized cross-validation score or not 1040 1041 objective: str 1042 'abs': Minimize the absolute difference between cross-validation score and validation score 1043 'relative': Minimize the relative difference between cross-validation score and validation score 1044 Returns: 1045 1046 A namedtuple with the following fields: 1047 - cv_score: float 1048 cross-validation score 1049 - val_score: float 1050 validation score 1051 - penalized_score: float 1052 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 1053 If higher scoring metric is better, minimize the function result. 1054 If lower scoring metric is better, maximize the function result. 1055 """ 1056 if scoring == "accuracy": 1057 scoring_func = accuracy_score 1058 elif scoring == "balanced_accuracy": 1059 scoring_func = balanced_accuracy_score 1060 elif scoring == "f1": 1061 scoring_func = f1_score 1062 elif scoring == "roc_auc": 1063 scoring_func = roc_auc_score 1064 elif scoring == "r2": 1065 scoring_func = r2_score 1066 elif scoring == "mse": 1067 scoring_func = mean_squared_error 1068 elif scoring == "mae": 1069 scoring_func = mean_absolute_error 1070 elif scoring == "mape": 1071 scoring_func = mean_absolute_percentage_error 1072 elif scoring == "rmse": 1073 1074 def scoring_func(y_true, y_pred): 1075 return np.sqrt(mean_squared_error(y_true, y_pred)) 1076 1077 X_train, X_val, y_train, y_val = train_test_split( 1078 X, y, test_size=0.2, random_state=random_state 1079 ) 1080 1081 res = cross_val_score( 1082 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 1083 ) # cross-validation error 1084 1085 if penalized == False: 1086 return res 1087 1088 DescribeResult = namedtuple( 1089 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 1090 ) 1091 1092 numerator = res.mean() 1093 1094 # Evaluate on the (cv+1)-th fold 1095 preds_val = self.fit(X_train, y_train).predict(X_val) 1096 try: 1097 denominator = scoring(y_val, preds_val) # validation error 1098 except Exception as e: 1099 denominator = scoring_func(y_val, preds_val) 1100 1101 # if higher is better 1102 if objective == "abs": 1103 penalized_score = np.abs(numerator - denominator) + epsilon * ( 1104 1 / denominator + 1 / numerator 1105 ) 1106 elif objective == "relative": 1107 ratio = numerator / denominator 1108 penalized_score = np.abs(ratio - 1) + epsilon * ( 1109 1 / denominator + 1 / numerator 1110 ) 1111 1112 return DescribeResult( 1113 cv_score=numerator, 1114 val_score=denominator, 1115 penalized_score=penalized_score, 1116 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 # scale input data before clustering 286 self.clustering_scaler_, scaled_X = mo.scale_covariates( 287 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 288 ) 289 290 self.clustering_obj_, X_clustered = mo.cluster_covariates( 291 scaled_X, 292 self.n_clusters, 293 self.seed, 294 type_clust=self.type_clust, 295 **kwargs 296 ) 297 298 if self.cluster_encode: 299 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 300 np.float16 301 ) 302 303 return X_clustered.astype(np.float16) 304 305 # if predict == True, encode test set 306 X_clustered = self.clustering_obj_.predict( 307 self.clustering_scaler_.transform(X) 308 ) 309 310 if self.cluster_encode == True: 311 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 312 np.float16 313 ) 314 315 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
317 def create_layer(self, scaled_X, W=None): 318 """Create hidden layer. 319 320 Parameters: 321 322 scaled_X: {array-like}, shape = [n_samples, n_features] 323 Training vectors, where n_samples is the number 324 of samples and n_features is the number of features 325 326 W: {array-like}, shape = [n_features, hidden_features] 327 if provided, constructs the hidden layer with W; otherwise computed internally 328 329 Returns: 330 331 Hidden layer matrix: {array-like} 332 333 """ 334 335 n_features = scaled_X.shape[1] 336 337 # hash_sim = { 338 # "sobol": generate_sobol, 339 # "hammersley": generate_hammersley, 340 # "uniform": generate_uniform, 341 # "halton": generate_halton 342 # } 343 344 if self.bias is False: # no bias term in the hidden layer 345 if W is None: 346 if self.nodes_sim == "sobol": 347 self.W_ = generate_sobol( 348 n_dims=n_features, 349 n_points=self.n_hidden_features, 350 seed=self.seed, 351 ) 352 elif self.nodes_sim == "hammersley": 353 self.W_ = generate_hammersley( 354 n_dims=n_features, 355 n_points=self.n_hidden_features, 356 seed=self.seed, 357 ) 358 elif self.nodes_sim == "uniform": 359 self.W_ = generate_uniform( 360 n_dims=n_features, 361 n_points=self.n_hidden_features, 362 seed=self.seed, 363 ) 364 else: 365 self.W_ = generate_halton( 366 n_dims=n_features, 367 n_points=self.n_hidden_features, 368 seed=self.seed, 369 ) 370 371 assert ( 372 scaled_X.shape[1] == self.W_.shape[0] 373 ), "check dimensions of covariates X and matrix W" 374 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot( 378 a=scaled_X, b=self.W_, backend=self.backend 379 ) 380 ), 381 drop_prob=self.dropout, 382 seed=self.seed, 383 ) 384 385 # W is not none 386 assert ( 387 scaled_X.shape[1] == W.shape[0] 388 ), "check dimensions of covariates X and matrix W" 389 390 # self.W_ = W 391 return mo.dropout( 392 x=self.activation_func( 393 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 394 ), 395 drop_prob=self.dropout, 396 seed=self.seed, 397 ) 398 399 # with bias term in the hidden layer 400 if W is None: 401 n_features_1 = n_features + 1 402 403 if self.nodes_sim == "sobol": 404 self.W_ = generate_sobol( 405 n_dims=n_features_1, 406 n_points=self.n_hidden_features, 407 seed=self.seed, 408 ) 409 elif self.nodes_sim == "hammersley": 410 self.W_ = generate_hammersley( 411 n_dims=n_features_1, 412 n_points=self.n_hidden_features, 413 seed=self.seed, 414 ) 415 elif self.nodes_sim == "uniform": 416 self.W_ = generate_uniform( 417 n_dims=n_features_1, 418 n_points=self.n_hidden_features, 419 seed=self.seed, 420 ) 421 else: 422 self.W_ = generate_halton( 423 n_dims=n_features_1, 424 n_points=self.n_hidden_features, 425 seed=self.seed, 426 ) 427 428 # self.W_ = hash_sim[self.nodes_sim]( 429 # n_dims=n_features_1, 430 # n_points=self.n_hidden_features, 431 # seed=self.seed, 432 # ) 433 434 return mo.dropout( 435 x=self.activation_func( 436 mo.safe_sparse_dot( 437 a=mo.cbind( 438 np.ones(scaled_X.shape[0]), 439 scaled_X, 440 backend=self.backend, 441 ), 442 b=self.W_, 443 backend=self.backend, 444 ) 445 ), 446 drop_prob=self.dropout, 447 seed=self.seed, 448 ) 449 450 # W is not None 451 # self.W_ = W 452 return mo.dropout( 453 x=self.activation_func( 454 mo.safe_sparse_dot( 455 a=mo.cbind( 456 np.ones(scaled_X.shape[0]), 457 scaled_X, 458 backend=self.backend, 459 ), 460 b=W, 461 backend=self.backend, 462 ) 463 ), 464 drop_prob=self.dropout, 465 seed=self.seed, 466 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
544 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 545 """Create new hidden features for training set, with hidden layer, center the response. 546 547 Parameters: 548 549 y: array-like, shape = [n_samples] 550 Target values 551 552 X: {array-like}, shape = [n_samples, n_features] 553 Training vectors, where n_samples is the number 554 of samples and n_features is the number of features 555 556 W: {array-like}, shape = [n_features, hidden_features] 557 if provided, constructs the hidden layer via W 558 559 Returns: 560 561 (centered response, direct link + hidden layer matrix): {tuple} 562 563 """ 564 565 # either X and y are stored or not 566 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 567 if self.n_hidden_features > 0: # has a hidden layer 568 assert ( 569 len(self.type_scaling) >= 2 570 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 571 572 if X is None: 573 if self.col_sample == 1: 574 input_X = self.X_ 575 else: 576 n_features = self.X_.shape[1] 577 new_n_features = int(np.ceil(n_features * self.col_sample)) 578 assert ( 579 new_n_features >= 1 580 ), "check class attribute 'col_sample' and the number of covariates provided for X" 581 np.random.seed(self.seed) 582 index_col = np.random.choice( 583 range(n_features), size=new_n_features, replace=False 584 ) 585 self.index_col_ = index_col 586 input_X = self.X_[:, self.index_col_] 587 588 else: # X is not None # keep X vs self.X_ 589 if isinstance(X, pd.DataFrame): 590 X = copy.deepcopy(X.values.astype(float)) 591 592 if self.col_sample == 1: 593 input_X = X 594 else: 595 n_features = X.shape[1] 596 new_n_features = int(np.ceil(n_features * self.col_sample)) 597 assert ( 598 new_n_features >= 1 599 ), "check class attribute 'col_sample' and the number of covariates provided for X" 600 np.random.seed(self.seed) 601 index_col = np.random.choice( 602 range(n_features), size=new_n_features, replace=False 603 ) 604 self.index_col_ = index_col 605 input_X = X[:, self.index_col_] 606 607 if self.n_clusters <= 0: 608 # data without any clustering: self.n_clusters is None ----- 609 610 if self.n_hidden_features > 0: # with hidden layer 611 self.nn_scaler_, scaled_X = mo.scale_covariates( 612 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 613 ) 614 Phi_X = ( 615 self.create_layer(scaled_X) 616 if W is None 617 else self.create_layer(scaled_X, W=W) 618 ) 619 Z = ( 620 mo.cbind(input_X, Phi_X, backend=self.backend) 621 if self.direct_link is True 622 else Phi_X 623 ) 624 self.scaler_, scaled_Z = mo.scale_covariates( 625 Z, choice=self.type_scaling[0], scaler=self.scaler_ 626 ) 627 else: # no hidden layer 628 Z = input_X 629 self.scaler_, scaled_Z = mo.scale_covariates( 630 Z, choice=self.type_scaling[0], scaler=self.scaler_ 631 ) 632 633 else: 634 # data with clustering: self.n_clusters is not None ----- # keep 635 636 augmented_X = mo.cbind( 637 input_X, 638 self.encode_clusters(input_X, **kwargs), 639 backend=self.backend, 640 ) 641 642 if self.n_hidden_features > 0: # with hidden layer 643 self.nn_scaler_, scaled_X = mo.scale_covariates( 644 augmented_X, 645 choice=self.type_scaling[1], 646 scaler=self.nn_scaler_, 647 ) 648 Phi_X = ( 649 self.create_layer(scaled_X) 650 if W is None 651 else self.create_layer(scaled_X, W=W) 652 ) 653 Z = ( 654 mo.cbind(augmented_X, Phi_X, backend=self.backend) 655 if self.direct_link is True 656 else Phi_X 657 ) 658 self.scaler_, scaled_Z = mo.scale_covariates( 659 Z, choice=self.type_scaling[0], scaler=self.scaler_ 660 ) 661 else: # no hidden layer 662 Z = augmented_X 663 self.scaler_, scaled_Z = mo.scale_covariates( 664 Z, choice=self.type_scaling[0], scaler=self.scaler_ 665 ) 666 667 # Returning model inputs ----- 668 if mx.is_factor(y) is False: # regression 669 # center y 670 if y is None: 671 self.y_mean_, centered_y = mo.center_response(self.y_) 672 else: 673 self.y_mean_, centered_y = mo.center_response(y) 674 675 # y is subsampled 676 if self.row_sample < 1: 677 n, p = Z.shape 678 679 self.subsampler_ = ( 680 SubSampler( 681 y=self.y_, row_sample=self.row_sample, seed=self.seed 682 ) 683 if y is None 684 else SubSampler( 685 y=y, row_sample=self.row_sample, seed=self.seed 686 ) 687 ) 688 689 self.index_row_ = self.subsampler_.subsample() 690 691 n_row_sample = len(self.index_row_) 692 # regression 693 return ( 694 centered_y[self.index_row_].reshape(n_row_sample), 695 self.scaler_.transform( 696 Z[self.index_row_, :].reshape(n_row_sample, p) 697 ), 698 ) 699 # y is not subsampled 700 # regression 701 return (centered_y, self.scaler_.transform(Z)) 702 703 # classification 704 # y is subsampled 705 if self.row_sample < 1: 706 n, p = Z.shape 707 708 self.subsampler_ = ( 709 SubSampler( 710 y=self.y_, row_sample=self.row_sample, seed=self.seed 711 ) 712 if y is None 713 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 714 ) 715 716 self.index_row_ = self.subsampler_.subsample() 717 718 n_row_sample = len(self.index_row_) 719 # classification 720 return ( 721 y[self.index_row_].reshape(n_row_sample), 722 self.scaler_.transform( 723 Z[self.index_row_, :].reshape(n_row_sample, p) 724 ), 725 ) 726 # y is not subsampled 727 # classification 728 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
730 def cook_test_set(self, X, **kwargs): 731 """Transform data from test set, with hidden layer. 732 733 Parameters: 734 735 X: {array-like}, shape = [n_samples, n_features] 736 Training vectors, where n_samples is the number 737 of samples and n_features is the number of features 738 739 **kwargs: additional parameters to be passed to self.encode_cluster 740 741 Returns: 742 743 Transformed test set : {array-like} 744 """ 745 746 if isinstance(X, pd.DataFrame): 747 X = copy.deepcopy(X.values.astype(float)) 748 749 if len(X.shape) == 1: 750 X = X.reshape(1, -1) 751 752 if ( 753 self.n_clusters == 0 754 ): # data without clustering: self.n_clusters is None ----- 755 if self.n_hidden_features > 0: 756 # if hidden layer 757 scaled_X = ( 758 self.nn_scaler_.transform(X) 759 if (self.col_sample == 1) 760 else self.nn_scaler_.transform(X[:, self.index_col_]) 761 ) 762 Phi_X = self.create_layer(scaled_X, self.W_) 763 if self.direct_link: 764 return self.scaler_.transform( 765 mo.cbind(scaled_X, Phi_X, backend=self.backend) 766 ) 767 # when self.direct_link == False 768 return self.scaler_.transform(Phi_X) 769 # if no hidden layer # self.n_hidden_features == 0 770 return self.scaler_.transform(X) 771 772 # data with clustering: self.n_clusters > 0 ----- 773 if self.col_sample == 1: 774 predicted_clusters = self.encode_clusters( 775 X=X, predict=True, **kwargs 776 ) 777 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 778 else: 779 predicted_clusters = self.encode_clusters( 780 X=X[:, self.index_col_], predict=True, **kwargs 781 ) 782 augmented_X = mo.cbind( 783 X[:, self.index_col_], predicted_clusters, backend=self.backend 784 ) 785 786 if self.n_hidden_features > 0: # if hidden layer 787 scaled_X = self.nn_scaler_.transform(augmented_X) 788 Phi_X = self.create_layer(scaled_X, self.W_) 789 if self.direct_link: 790 return self.scaler_.transform( 791 mo.cbind(augmented_X, Phi_X, backend=self.backend) 792 ) 793 return self.scaler_.transform(Phi_X) 794 795 # if no hidden layer 796 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self 152 153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(MTS): 43 """Time series with statistical models (statsmodels), mostly for benchmarks 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 Default is None 50 51 obj: object 52 A time series model from statsmodels 53 54 Attributes: 55 56 df_: data frame 57 the input data frame, in case a data.frame is provided to `fit` 58 59 level_: int 60 level of confidence for prediction intervals (default is 95) 61 62 Examples: 63 See examples/classical_mts_timeseries.py 64 """ 65 66 # construct the object ----- 67 68 def __init__(self, model="VAR", obj=None): 69 if obj is not None: 70 self.model = None 71 self.obj = obj 72 else: 73 self.model = model 74 if self.model == "VAR": 75 self.obj = VAR 76 elif self.model == "VECM": 77 self.obj = VECM 78 elif self.model == "ARIMA": 79 self.obj = ARIMA 80 elif self.model == "ETS": 81 self.obj = ExponentialSmoothing 82 elif self.model == "Theta": 83 self.obj = ThetaModel 84 else: 85 raise ValueError("model not recognized") 86 self.n_series = None 87 self.replications = None 88 self.mean_ = None 89 self.upper_ = None 90 self.lower_ = None 91 self.output_dates_ = None 92 self.alpha_ = None 93 self.df_ = None 94 self.residuals_ = [] 95 self.sims_ = None 96 self.level_ = None 97 98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self 168 169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 ) 338 339 def _compute_confidence_intervals(self, forecast_result, alpha): 340 """ 341 Compute confidence intervals for VECM forecasts. 342 Uses the covariance of residuals to approximate the confidence intervals. 343 """ 344 residuals = self.obj.resid 345 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 346 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 347 348 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 349 lower_bound = forecast_result - z_value * std_errors 350 upper_bound = forecast_result + z_value * std_errors 351 352 return lower_bound, upper_bound 353 354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds) 427 428 def plot(self, series=None, type_axis="dates", type_plot="pi"): 429 """Plot time series forecast 430 431 Parameters: 432 433 series: {integer} or {string} 434 series index or name 435 436 """ 437 438 assert all( 439 [ 440 self.mean_ is not None, 441 self.lower_ is not None, 442 self.upper_ is not None, 443 self.output_dates_ is not None, 444 ] 445 ), "model forecasting must be obtained first (with predict)" 446 447 if series is None: 448 assert ( 449 self.n_series == 1 450 ), "please specify series index or name (n_series > 1)" 451 series = 0 452 453 if isinstance(series, str): 454 assert ( 455 series in self.series_names 456 ), f"series {series} doesn't exist in the input dataset" 457 series_idx = self.df_.columns.get_loc(series) 458 else: 459 assert isinstance(series, int) and ( 460 0 <= series < self.n_series 461 ), f"check series index (< {self.n_series})" 462 series_idx = series 463 464 if isinstance(self.df_, pd.DataFrame): 465 y_all = list(self.df_.iloc[:, series_idx]) + list( 466 self.mean_.iloc[:, series_idx] 467 ) 468 y_test = list(self.mean_.iloc[:, series_idx]) 469 else: 470 y_all = list(self.df_.values) + list(self.mean_.values) 471 y_test = list(self.mean_.values) 472 n_points_all = len(y_all) 473 n_points_train = self.df_.shape[0] 474 475 if type_axis == "numeric": 476 x_all = [i for i in range(n_points_all)] 477 x_test = [i for i in range(n_points_train, n_points_all)] 478 479 if type_axis == "dates": # use dates 480 x_all = np.concatenate( 481 (self.input_dates.values, self.output_dates_.values), axis=None 482 ) 483 x_test = self.output_dates_.values 484 485 if type_plot == "pi": 486 fig, ax = plt.subplots() 487 ax.plot(x_all, y_all, "-") 488 ax.plot(x_test, y_test, "-", color="orange") 489 try: 490 ax.fill_between( 491 x_test, 492 self.lower_.iloc[:, series_idx], 493 self.upper_.iloc[:, series_idx], 494 alpha=0.2, 495 color="orange", 496 ) 497 except Exception: 498 ax.fill_between( 499 x_test, 500 self.lower_.values, 501 self.upper_.values, 502 alpha=0.2, 503 color="orange", 504 ) 505 if self.replications is None: 506 if self.n_series > 1: 507 plt.title( 508 f"prediction intervals for {series}", 509 loc="left", 510 fontsize=12, 511 fontweight=0, 512 color="black", 513 ) 514 else: 515 plt.title( 516 f"prediction intervals for input time series", 517 loc="left", 518 fontsize=12, 519 fontweight=0, 520 color="black", 521 ) 522 plt.show() 523 else: # self.replications is not None 524 if self.n_series > 1: 525 plt.title( 526 f"prediction intervals for {self.replications} simulations of {series}", 527 loc="left", 528 fontsize=12, 529 fontweight=0, 530 color="black", 531 ) 532 else: 533 plt.title( 534 f"prediction intervals for {self.replications} simulations of input time series", 535 loc="left", 536 fontsize=12, 537 fontweight=0, 538 color="black", 539 ) 540 plt.show() 541 542 if type_plot == "spaghetti": 543 palette = plt.get_cmap("Set1") 544 sims_ix = getsims(self.sims_, series_idx) 545 plt.plot(x_all, y_all, "-") 546 for col_ix in range( 547 sims_ix.shape[1] 548 ): # avoid this when there are thousands of simulations 549 plt.plot( 550 x_test, 551 sims_ix[:, col_ix], 552 "-", 553 color=palette(col_ix), 554 linewidth=1, 555 alpha=0.9, 556 ) 557 plt.plot(x_all, y_all, "-", color="black") 558 plt.plot(x_test, y_test, "-", color="blue") 559 # Add titles 560 if self.n_series > 1: 561 plt.title( 562 f"{self.replications} simulations of {series}", 563 loc="left", 564 fontsize=12, 565 fontweight=0, 566 color="black", 567 ) 568 else: 569 plt.title( 570 f"{self.replications} simulations of input time series", 571 loc="left", 572 fontsize=12, 573 fontweight=0, 574 color="black", 575 ) 576 plt.xlabel("Time") 577 plt.ylabel("Values") 578 # Show the graph 579 plt.show() 580 581 def cross_val_score( 582 self, 583 X, 584 scoring="root_mean_squared_error", 585 n_jobs=None, 586 verbose=0, 587 xreg=None, 588 initial_window=5, 589 horizon=3, 590 fixed_window=False, 591 show_progress=True, 592 level=95, 593 **kwargs, 594 ): 595 """Evaluate a score by time series cross-validation. 596 597 Parameters: 598 599 X: {array-like, sparse matrix} of shape (n_samples, n_features) 600 The data to fit. 601 602 scoring: str or a function 603 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 604 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 605 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 606 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 607 608 n_jobs: int, default=None 609 Number of jobs to run in parallel. 610 611 verbose: int, default=0 612 The verbosity level. 613 614 xreg: array-like, optional (default=None) 615 Additional (external) regressors to be passed to `fit` 616 xreg must be in 'increasing' order (most recent observations last) 617 618 initial_window: int 619 initial number of consecutive values in each training set sample 620 621 horizon: int 622 number of consecutive values in test set sample 623 624 fixed_window: boolean 625 if False, all training samples start at index 0, and the training 626 window's size is increasing. 627 if True, the training window's size is fixed, and the window is 628 rolling forward 629 630 show_progress: boolean 631 if True, a progress bar is printed 632 633 **kwargs: dict 634 additional parameters to be passed to `fit` and `predict` 635 636 Returns: 637 638 A tuple: descriptive statistics or errors and raw errors 639 640 """ 641 tscv = TimeSeriesSplit() 642 643 tscv_obj = tscv.split( 644 X, 645 initial_window=initial_window, 646 horizon=horizon, 647 fixed_window=fixed_window, 648 ) 649 650 if isinstance(scoring, str): 651 assert scoring in ( 652 "root_mean_squared_error", 653 "mean_squared_error", 654 "mean_error", 655 "mean_absolute_error", 656 "mean_percentage_error", 657 "mean_absolute_percentage_error", 658 "winkler_score", 659 "coverage", 660 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 661 662 def err_func(X_test, X_pred, scoring): 663 if (self.replications is not None) or ( 664 self.type_pi == "gaussian" 665 ): # probabilistic 666 if scoring == "winkler_score": 667 return winkler_score(X_pred, X_test, level=level) 668 elif scoring == "coverage": 669 return coverage(X_pred, X_test, level=level) 670 else: 671 return mean_errors( 672 pred=X_pred.mean, actual=X_test, scoring=scoring 673 ) 674 else: # not probabilistic 675 return mean_errors( 676 pred=X_pred, actual=X_test, scoring=scoring 677 ) 678 679 else: # isinstance(scoring, str) = False 680 err_func = scoring 681 682 errors = [] 683 684 train_indices = [] 685 686 test_indices = [] 687 688 for train_index, test_index in tscv_obj: 689 train_indices.append(train_index) 690 test_indices.append(test_index) 691 692 if show_progress is True: 693 iterator = tqdm( 694 zip(train_indices, test_indices), total=len(train_indices) 695 ) 696 else: 697 iterator = zip(train_indices, test_indices) 698 699 for train_index, test_index in iterator: 700 if verbose == 1: 701 print(f"TRAIN: {train_index}") 702 print(f"TEST: {test_index}") 703 704 if isinstance(X, pd.DataFrame): 705 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 706 X_test = X.iloc[test_index, :] 707 else: 708 self.fit(X[train_index, :], xreg=xreg, **kwargs) 709 X_test = X[test_index, :] 710 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 711 712 errors.append(err_func(X_test, X_pred, scoring)) 713 714 res = np.asarray(errors) 715 716 return res, describe(res)
Time series with statistical models (statsmodels), mostly for benchmarks
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Default is None
obj: object
A time series model from statsmodels
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self
Fit ClassicalMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 )
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend, 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self 277 278 def partial_fit(self, X, y, sample_weight=None, **kwargs): 279 """Partial fit custom model to training data (X, y). 280 281 Parameters: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Subset of training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 y: array-like, shape = [n_samples] 288 Subset of target values. 289 290 sample_weight: array-like, shape = [n_samples] 291 Sample weights. 292 293 **kwargs: additional parameters to be passed to 294 self.cook_training_set or self.obj.fit 295 296 Returns: 297 298 self: object 299 """ 300 301 if len(X.shape) == 1: 302 if isinstance(X, pd.DataFrame): 303 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 304 else: 305 X = X.reshape(1, -1) 306 y = np.array([y], dtype=np.integer) 307 308 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 309 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 310 311 # if sample_weights, else: (must use self.row_index) 312 if sample_weight is not None: 313 try: 314 self.obj.partial_fit( 315 scaled_Z, 316 output_y, 317 sample_weight=sample_weight[self.index_row_].ravel(), 318 # **kwargs 319 ) 320 except: 321 NotImplementedError 322 323 return self 324 325 # if sample_weight is None: 326 # try: 327 self.obj.partial_fit(scaled_Z, output_y) 328 # except: 329 # raise NotImplementedError 330 331 self.classes_ = np.unique(y) # for compatibility with sklearn 332 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 333 334 return self 335 336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 395 396 def decision_function(self, X, **kwargs): 397 """Compute the decision function of X. 398 399 Parameters: 400 X: {array-like}, shape = [n_samples, n_features] 401 Samples to compute decision function for. 402 403 **kwargs: additional parameters to be passed to 404 self.cook_test_set 405 406 Returns: 407 array-like of shape (n_samples,) or (n_samples, n_classes) 408 Decision function of the input samples. The order of outputs is the same 409 as that of the classes passed to fit. 410 """ 411 if not hasattr(self.obj, "decision_function"): 412 # If base classifier doesn't have decision_function, use predict_proba 413 proba = self.predict_proba(X, **kwargs) 414 if proba.shape[1] == 2: 415 return proba[:, 1] # For binary classification 416 return proba # For multiclass 417 418 if len(X.shape) == 1: 419 n_features = X.shape[0] 420 new_X = mo.rbind( 421 X.reshape(1, n_features), 422 np.ones(n_features).reshape(1, n_features), 423 ) 424 425 return ( 426 self.obj.decision_function( 427 self.cook_test_set(new_X, **kwargs), **kwargs 428 ) 429 )[0] 430 431 return self.obj.decision_function( 432 self.cook_test_set(X, **kwargs), **kwargs 433 ) 434 435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X)) 487 488 @property 489 def _estimator_type(self): 490 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self 250 251 def partial_fit(self, X, y, **kwargs): 252 """Partial fit custom model to training data (X, y). 253 254 Parameters: 255 256 X: {array-like}, shape = [n_samples, n_features] 257 Subset of training vectors, where n_samples is the number 258 of samples and n_features is the number of features. 259 260 y: array-like, shape = [n_samples] 261 Subset of target values. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_training_set or self.obj.fit 265 266 Returns: 267 268 self: object 269 270 """ 271 272 if len(X.shape) == 1: 273 if isinstance(X, pd.DataFrame): 274 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 275 else: 276 X = X.reshape(1, -1) 277 y = np.array([y]) 278 279 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 280 281 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 282 283 self.X_ = X 284 285 self.y_ = y 286 287 return self 288 289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 ) 412 413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
20class CustomBackPropRegressor(Custom, RegressorMixin): 21 """ 22 Finite difference trainer for nnetsauce models. 23 24 Parameters 25 ---------- 26 27 base_model : str 28 The name of the base model (e.g., 'RidgeCV'). 29 30 type_grad : {'finitediff', 'autodiff'}, optional 31 Type of gradient computation to use (default='finitediff'). 32 33 lr : float, optional 34 Learning rate for optimization (default=1e-4). 35 36 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 37 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 38 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 39 40 eps : float, optional 41 Scaling factor for adaptive finite difference step size (default=1e-3). 42 43 batch_size : int, optional 44 Batch size for 'sgd' optimizer (default=32). 45 46 alpha : float, optional 47 Elastic net penalty strength (default=0.0). 48 49 l1_ratio : float, optional 50 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 51 52 type_loss : {'mse', 'quantile'}, optional 53 Type of loss function to use (default='mse'). 54 55 q : float, optional 56 Quantile for quantile loss (default=0.5). 57 58 **kwargs 59 Additional parameters to pass to the scikit-learn model. 60 61 """ 62 63 def __init__( 64 self, 65 base_model, 66 type_grad="finitediff", 67 lr=1e-4, 68 optimizer="gd", 69 eps=1e-3, 70 batch_size=32, 71 alpha=0.0, 72 l1_ratio=0.0, 73 type_loss="mse", 74 q=0.5, 75 backend="cpu", 76 **kwargs, 77 ): 78 super().__init__(base_model, True, **kwargs) 79 self.base_model = base_model 80 self.custom_kwargs = kwargs 81 self.backend = backend 82 self.model = ns.CustomRegressor( 83 self.base_model, backend=self.backend, **self.custom_kwargs 84 ) 85 assert isinstance( 86 self.model, ns.CustomRegressor 87 ), "'model' must be of class ns.CustomRegressor" 88 self.type_grad = type_grad 89 self.lr = lr 90 self.optimizer = optimizer 91 self.eps = eps 92 self.loss_history_ = [] 93 self.opt_state = None 94 self.batch_size = batch_size # for SGD 95 self.loss_history_ = [] 96 self._cd_index = 0 # For coordinate descent 97 self.alpha = alpha 98 self.l1_ratio = l1_ratio 99 self.type_loss = type_loss 100 self.q = q 101 102 def _loss(self, X, y, **kwargs): 103 """ 104 Compute the loss (with elastic net penalty) for the current model. 105 106 Parameters 107 ---------- 108 109 X : array-like of shape (n_samples, n_features) 110 Input data. 111 112 y : array-like of shape (n_samples,) 113 Target values. 114 115 **kwargs 116 Additional keyword arguments for loss calculation. 117 118 Returns 119 ------- 120 float 121 The computed loss value. 122 """ 123 y_pred = self.model.predict(X) 124 if self.type_loss == "mse": 125 loss = np.mean((y - y_pred) ** 2) 126 elif self.type_loss == "quantile": 127 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 128 W = self.model.W_ 129 l1 = np.sum(np.abs(W)) 130 l2 = np.sum(W**2) 131 return loss + self.alpha * ( 132 self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2 133 ) 134 135 def _compute_grad(self, X, y): 136 """ 137 Compute the gradient of the loss with respect to W_ using finite differences. 138 139 Parameters 140 ---------- 141 142 X : array-like of shape (n_samples, n_features) 143 Input data. 144 145 y : array-like of shape (n_samples,) 146 Target values. 147 148 Returns 149 ------- 150 151 ndarray 152 Gradient array with the same shape as W_. 153 """ 154 if self.type_grad == "autodiff": 155 raise NotImplementedError( 156 "Automatic differentiation is not implemented yet." 157 ) 158 # Use JAX for automatic differentiation 159 W = deepcopy(self.model.W_) 160 W_flat = W.flatten() 161 n_params = W_flat.size 162 163 def loss_fn(W_flat): 164 W_reshaped = W_flat.reshape(W.shape) 165 self.model.W_ = W_reshaped 166 return self._loss(X, y) 167 168 grad_fn = jax.grad(loss_fn) 169 grad_flat = grad_fn(W_flat) 170 grad = grad_flat.reshape(W.shape) 171 172 # Add elastic net gradient 173 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 174 l2_grad = self.alpha * (1 - self.l1_ratio) * W 175 grad += l1_grad + l2_grad 176 177 self.model.W_ = W 178 return grad 179 180 # Finite difference gradient computation 181 W = deepcopy(self.model.W_) 182 shape = W.shape 183 W_flat = W.flatten() 184 n_params = W_flat.size 185 186 # Adaptive finite difference step 187 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 188 eye = np.eye(n_params) 189 190 loss_plus = np.zeros(n_params) 191 loss_minus = np.zeros(n_params) 192 193 for i in range(n_params): 194 h_i = h_vec[i] 195 Wp = W_flat.copy() 196 Wp[i] += h_i 197 Wm = W_flat.copy() 198 Wm[i] -= h_i 199 200 self.model.W_ = Wp.reshape(shape) 201 loss_plus[i] = self._loss(X, y) 202 203 self.model.W_ = Wm.reshape(shape) 204 loss_minus[i] = self._loss(X, y) 205 206 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 207 208 # Add elastic net gradient 209 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 210 l2_grad = self.alpha * (1 - self.l1_ratio) * W 211 grad += l1_grad + l2_grad 212 213 self.model.W_ = W # restore original 214 return grad 215 216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 W_shape = self.model.W_.shape 312 W_flat_size = self.model.W_.size 313 W_flat = self.model.W_.flatten() 314 grad_flat = grad.flatten() 315 316 # Update only one coordinate per epoch (cyclic) 317 idx = self._cd_index % W_flat_size 318 W_flat[idx] -= self.lr * grad_flat[idx] 319 # Clip the updated value 320 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 321 322 # Restore W_ 323 self.model.W_ = W_flat.reshape(W_shape) 324 325 self._cd_index += 1 326 327 else: 328 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 329 330 loss = self._loss(X, y) 331 self.loss_history_.append(loss) 332 333 if verbose: 334 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 335 336 # if sample_weights, else: (must use self.row_index) 337 if sample_weight in kwargs: 338 self.model.fit( 339 X, 340 y, 341 sample_weight=sample_weight[self.index_row_].ravel(), 342 **kwargs, 343 ) 344 345 return self 346 347 return self 348 349 def predict(self, X, level=95, method="splitconformal", **kwargs): 350 """ 351 Predict using the trained model. 352 353 Parameters 354 ---------- 355 356 X : array-like of shape (n_samples, n_features) 357 Input data. 358 359 level : int, optional 360 Level of confidence for prediction intervals (default=95). 361 362 method : {'splitconformal', 'localconformal'}, optional 363 Method for conformal prediction (default='splitconformal'). 364 365 **kwargs 366 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 367 or `return_std=True` for standard deviation estimates. 368 369 Returns 370 ------- 371 372 array or tuple 373 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 374 """ 375 if "return_std" in kwargs: 376 alpha = 100 - level 377 pi_multiplier = norm.ppf(1 - alpha / 200) 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 386 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 387 388 preds = mean_ 389 lower = mean_ - pi_multiplier * std_ 390 upper = mean_ + pi_multiplier * std_ 391 392 DescribeResults = namedtuple( 393 "DescribeResults", ["mean", "std", "lower", "upper"] 394 ) 395 396 return DescribeResults(preds, std_, lower, upper) 397 398 # len(X.shape) > 1 399 mean_, std_ = self.model.predict(X, return_std=True) 400 401 preds = mean_ 402 lower = mean_ - pi_multiplier * std_ 403 upper = mean_ + pi_multiplier * std_ 404 405 DescribeResults = namedtuple( 406 "DescribeResults", ["mean", "std", "lower", "upper"] 407 ) 408 409 return DescribeResults(preds, std_, lower, upper) 410 411 if "return_pi" in kwargs: 412 assert method in ( 413 "splitconformal", 414 "localconformal", 415 ), "method must be in ('splitconformal', 'localconformal')" 416 self.pi = ns.PredictionInterval( 417 obj=self, 418 method=method, 419 level=level, 420 type_pi=self.type_pi, 421 replications=self.replications, 422 kernel=self.kernel, 423 ) 424 425 if len(self.X_.shape) == 1: 426 if isinstance(X, pd.DataFrame): 427 self.X_ = pd.DataFrame( 428 self.X_.values.reshape(1, -1), columns=self.X_.columns 429 ) 430 else: 431 self.X_ = self.X_.reshape(1, -1) 432 self.y_ = np.array([self.y_]) 433 434 self.pi.fit(self.X_, self.y_) 435 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 436 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 437 preds = self.pi.predict(X, return_pi=True) 438 return preds 439 440 # "return_std" not in kwargs 441 if len(X.shape) == 1: 442 n_features = X.shape[0] 443 new_X = mo.rbind( 444 X.reshape(1, n_features), 445 np.ones(n_features).reshape(1, n_features), 446 ) 447 448 return (0 + self.model.predict(new_X, **kwargs))[0] 449 450 # len(X.shape) > 1 451 return self.model.predict(X, **kwargs)
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 W_shape = self.model.W_.shape 312 W_flat_size = self.model.W_.size 313 W_flat = self.model.W_.flatten() 314 grad_flat = grad.flatten() 315 316 # Update only one coordinate per epoch (cyclic) 317 idx = self._cd_index % W_flat_size 318 W_flat[idx] -= self.lr * grad_flat[idx] 319 # Clip the updated value 320 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 321 322 # Restore W_ 323 self.model.W_ = W_flat.reshape(W_shape) 324 325 self._cd_index += 1 326 327 else: 328 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 329 330 loss = self._loss(X, y) 331 self.loss_history_.append(loss) 332 333 if verbose: 334 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 335 336 # if sample_weights, else: (must use self.row_index) 337 if sample_weight in kwargs: 338 self.model.fit( 339 X, 340 y, 341 sample_weight=sample_weight[self.index_row_].ravel(), 342 **kwargs, 343 ) 344 345 return self 346 347 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
349 def predict(self, X, level=95, method="splitconformal", **kwargs): 350 """ 351 Predict using the trained model. 352 353 Parameters 354 ---------- 355 356 X : array-like of shape (n_samples, n_features) 357 Input data. 358 359 level : int, optional 360 Level of confidence for prediction intervals (default=95). 361 362 method : {'splitconformal', 'localconformal'}, optional 363 Method for conformal prediction (default='splitconformal'). 364 365 **kwargs 366 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 367 or `return_std=True` for standard deviation estimates. 368 369 Returns 370 ------- 371 372 array or tuple 373 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 374 """ 375 if "return_std" in kwargs: 376 alpha = 100 - level 377 pi_multiplier = norm.ppf(1 - alpha / 200) 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 386 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 387 388 preds = mean_ 389 lower = mean_ - pi_multiplier * std_ 390 upper = mean_ + pi_multiplier * std_ 391 392 DescribeResults = namedtuple( 393 "DescribeResults", ["mean", "std", "lower", "upper"] 394 ) 395 396 return DescribeResults(preds, std_, lower, upper) 397 398 # len(X.shape) > 1 399 mean_, std_ = self.model.predict(X, return_std=True) 400 401 preds = mean_ 402 lower = mean_ - pi_multiplier * std_ 403 upper = mean_ + pi_multiplier * std_ 404 405 DescribeResults = namedtuple( 406 "DescribeResults", ["mean", "std", "lower", "upper"] 407 ) 408 409 return DescribeResults(preds, std_, lower, upper) 410 411 if "return_pi" in kwargs: 412 assert method in ( 413 "splitconformal", 414 "localconformal", 415 ), "method must be in ('splitconformal', 'localconformal')" 416 self.pi = ns.PredictionInterval( 417 obj=self, 418 method=method, 419 level=level, 420 type_pi=self.type_pi, 421 replications=self.replications, 422 kernel=self.kernel, 423 ) 424 425 if len(self.X_.shape) == 1: 426 if isinstance(X, pd.DataFrame): 427 self.X_ = pd.DataFrame( 428 self.X_.values.reshape(1, -1), columns=self.X_.columns 429 ) 430 else: 431 self.X_ = self.X_.reshape(1, -1) 432 self.y_ = np.array([self.y_]) 433 434 self.pi.fit(self.X_, self.y_) 435 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 436 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 437 preds = self.pi.predict(X, return_pi=True) 438 return preds 439 440 # "return_std" not in kwargs 441 if len(X.shape) == 1: 442 n_features = X.shape[0] 443 new_X = mo.rbind( 444 X.reshape(1, n_features), 445 np.ones(n_features).reshape(1, n_features), 446 ) 447 448 return (0 + self.model.predict(new_X, **kwargs))[0] 449 450 # len(X.shape) > 1 451 return self.model.predict(X, **kwargs)
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True for prediction intervals,
or return_std=True for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
36class DeepClassifier(CustomClassifier, ClassifierMixin): 37 """ 38 Deep Classifier 39 40 Parameters: 41 42 obj: an object 43 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 44 45 n_layers: int (default=3) 46 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 47 48 verbose : int, optional (default=0) 49 Monitor progress when fitting. 50 51 All the other parameters are nnetsauce `CustomClassifier`'s 52 53 Examples: 54 55 ```python 56 import nnetsauce as ns 57 from sklearn.datasets import load_breast_cancer 58 from sklearn.model_selection import train_test_split 59 from sklearn.linear_model import LogisticRegressionCV 60 data = load_breast_cancer() 61 X = data.data 62 y= data.target 63 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 64 obj = LogisticRegressionCV() 65 clf = ns.DeepClassifier(obj) 66 clf.fit(X_train, y_train) 67 print(clf.score(clf.predict(X_test), y_test)) 68 ``` 69 """ 70 71 _estimator_type = "classifier" 72 73 def __init__( 74 self, 75 obj, 76 # Defining depth 77 n_layers=3, 78 verbose=0, 79 # CustomClassifier attributes 80 n_hidden_features=5, 81 activation_name="relu", 82 a=0.01, 83 nodes_sim="sobol", 84 bias=True, 85 dropout=0, 86 direct_link=True, 87 n_clusters=2, 88 cluster_encode=True, 89 type_clust="kmeans", 90 type_scaling=("std", "std", "std"), 91 col_sample=1, 92 row_sample=1, 93 cv_calibration=2, 94 calibration_method="sigmoid", 95 seed=123, 96 backend="cpu", 97 ): 98 super().__init__( 99 obj=obj, 100 n_hidden_features=n_hidden_features, 101 activation_name=activation_name, 102 a=a, 103 nodes_sim=nodes_sim, 104 bias=bias, 105 dropout=dropout, 106 direct_link=direct_link, 107 n_clusters=n_clusters, 108 cluster_encode=cluster_encode, 109 type_clust=type_clust, 110 type_scaling=type_scaling, 111 col_sample=col_sample, 112 row_sample=row_sample, 113 seed=seed, 114 backend=backend, 115 ) 116 self.coef_ = None 117 self.intercept_ = None 118 self.type_fit = "classification" 119 self.cv_calibration = cv_calibration 120 self.calibration_method = calibration_method 121 122 # Only wrap in CalibratedClassifierCV if not already wrapped 123 # if not isinstance(obj, CalibratedClassifierCV): 124 # self.obj = CalibratedClassifierCV( 125 # self.obj, 126 # cv=self.cv_calibration, 127 # method=self.calibration_method 128 # ) 129 # else: 130 self.coef_ = None 131 self.intercept_ = None 132 self.type_fit = "classification" 133 self.cv_calibration = cv_calibration 134 self.calibration_method = calibration_method 135 self.obj = obj 136 137 assert n_layers >= 1, "must have n_layers >= 1" 138 self.stacked_obj = obj 139 self.verbose = verbose 140 self.n_layers = n_layers 141 self.classes_ = None 142 self.n_classes_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self 224 225 def partial_fit(self, X, y, **kwargs): 226 """Fit Regression algorithms to X and y. 227 Parameters 228 ---------- 229 X : array-like, 230 Training vectors, where rows is the number of samples 231 and columns is the number of features. 232 y : array-like, 233 Training vectors, where rows is the number of samples 234 and columns is the number of features. 235 **kwargs: dict 236 Additional parameters to be passed to the fit method 237 of the base learner. For example, `sample_weight`. 238 Returns 239 ------- 240 A fitted object 241 """ 242 assert hasattr(self, "stacked_obj"), "model must be fitted first" 243 current_obj = self.stacked_obj 244 for _ in range(self.n_layers): 245 try: 246 input_X = current_obj.obj.cook_test_set(X) 247 current_obj.obj.partial_fit(input_X, y, **kwargs) 248 try: 249 current_obj = current_obj.obj 250 except AttributeError: 251 pass 252 except ValueError: 253 pass 254 return self 255 256 def predict(self, X): 257 return self.stacked_obj.predict(X) 258 259 def predict_proba(self, X): 260 return self.stacked_obj.predict_proba(X) 261 262 def score(self, X, y, scoring=None): 263 return self.stacked_obj.score(X, y, scoring) 264 265 def cross_val_optim( 266 self, 267 X_train, 268 y_train, 269 X_test=None, 270 y_test=None, 271 scoring="accuracy", 272 surrogate_obj=None, 273 cv=5, 274 n_jobs=None, 275 n_init=10, 276 n_iter=190, 277 abs_tol=1e-3, 278 verbose=2, 279 seed=123, 280 **kwargs, 281 ): 282 """Cross-validation function and hyperparameters' search 283 284 Parameters: 285 286 X_train: array-like, 287 Training vectors, where rows is the number of samples 288 and columns is the number of features. 289 290 y_train: array-like, 291 Training vectors, where rows is the number of samples 292 and columns is the number of features. 293 294 X_test: array-like, 295 Testing vectors, where rows is the number of samples 296 and columns is the number of features. 297 298 y_test: array-like, 299 Testing vectors, where rows is the number of samples 300 and columns is the number of features. 301 302 scoring: str 303 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 304 305 surrogate_obj: an object; 306 An ML model for estimating the uncertainty around the objective function 307 308 cv: int; 309 number of cross-validation folds 310 311 n_jobs: int; 312 number of jobs for parallel execution 313 314 n_init: an integer; 315 number of points in the initial setting, when `x_init` and `y_init` are not provided 316 317 n_iter: an integer; 318 number of iterations of the minimization algorithm 319 320 abs_tol: a float; 321 tolerance for convergence of the optimizer (early stopping based on acquisition function) 322 323 verbose: int 324 controls verbosity 325 326 seed: int 327 reproducibility seed 328 329 **kwargs: dict 330 additional parameters to be passed to the estimator 331 332 Examples: 333 334 ```python 335 ``` 336 """ 337 338 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 339 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 340 num_to_type_clust = {1: "kmeans", 2: "gmm"} 341 342 def deepclassifier_cv( 343 X_train, 344 y_train, 345 # Defining depth 346 n_layers=3, 347 # CustomClassifier attributes 348 n_hidden_features=5, 349 activation_name="relu", 350 nodes_sim="sobol", 351 dropout=0, 352 n_clusters=2, 353 type_clust="kmeans", 354 cv=5, 355 n_jobs=None, 356 scoring="accuracy", 357 seed=123, 358 ): 359 self.set_params( 360 **{ 361 "n_layers": n_layers, 362 # CustomClassifier attributes 363 "n_hidden_features": n_hidden_features, 364 "activation_name": activation_name, 365 "nodes_sim": nodes_sim, 366 "dropout": dropout, 367 "n_clusters": n_clusters, 368 "type_clust": type_clust, 369 **kwargs, 370 } 371 ) 372 return -cross_val_score( 373 estimator=self, 374 X=X_train, 375 y=y_train, 376 scoring=scoring, 377 cv=cv, 378 n_jobs=n_jobs, 379 verbose=0, 380 ).mean() 381 382 # objective function for hyperparams tuning 383 def crossval_objective(xx): 384 return deepclassifier_cv( 385 X_train=X_train, 386 y_train=y_train, 387 # Defining depth 388 n_layers=int(np.ceil(xx[0])), 389 # CustomClassifier attributes 390 n_hidden_features=int(np.ceil(xx[1])), 391 activation_name=num_to_activation_name[np.ceil(xx[2])], 392 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 393 dropout=xx[4], 394 n_clusters=int(np.ceil(xx[5])), 395 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 396 cv=cv, 397 n_jobs=n_jobs, 398 scoring=scoring, 399 seed=seed, 400 ) 401 402 if surrogate_obj is None: 403 gp_opt = gp.GPOpt( 404 objective_func=crossval_objective, 405 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 406 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 407 params_names=[ 408 "n_layers", 409 # CustomClassifier attributes 410 "n_hidden_features", 411 "activation_name", 412 "nodes_sim", 413 "dropout", 414 "n_clusters", 415 "type_clust", 416 ], 417 method="bayesian", 418 n_init=n_init, 419 n_iter=n_iter, 420 seed=seed, 421 ) 422 else: 423 gp_opt = gp.GPOpt( 424 objective_func=crossval_objective, 425 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 426 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 427 params_names=[ 428 "n_layers", 429 # CustomClassifier attributes 430 "n_hidden_features", 431 "activation_name", 432 "nodes_sim", 433 "dropout", 434 "n_clusters", 435 "type_clust", 436 ], 437 acquisition="ucb", 438 method="splitconformal", 439 surrogate_obj=ns.PredictionInterval( 440 obj=surrogate_obj, method="splitconformal" 441 ), 442 n_init=n_init, 443 n_iter=n_iter, 444 seed=seed, 445 ) 446 447 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 448 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 449 res.best_params["n_hidden_features"] = int( 450 np.ceil(res.best_params["n_hidden_features"]) 451 ) 452 res.best_params["activation_name"] = num_to_activation_name[ 453 np.ceil(res.best_params["activation_name"]) 454 ] 455 res.best_params["nodes_sim"] = num_to_nodes_sim[ 456 int(np.ceil(res.best_params["nodes_sim"])) 457 ] 458 res.best_params["dropout"] = res.best_params["dropout"] 459 res.best_params["n_clusters"] = int( 460 np.ceil(res.best_params["n_clusters"]) 461 ) 462 res.best_params["type_clust"] = num_to_type_clust[ 463 int(np.ceil(res.best_params["type_clust"])) 464 ] 465 466 # out-of-sample error 467 if X_test is not None and y_test is not None: 468 self.set_params(**res.best_params, verbose=0, seed=seed) 469 preds = self.fit(X_train, y_train).predict(X_test) 470 # check error on y_test 471 oos_err = getattr(metrics, scoring + "_score")( 472 y_true=y_test, y_pred=preds 473 ) 474 result = namedtuple("result", res._fields + ("test_" + scoring,)) 475 return result(*res, oos_err) 476 else: 477 return res 478 479 def lazy_cross_val_optim( 480 self, 481 X_train, 482 y_train, 483 X_test=None, 484 y_test=None, 485 scoring="accuracy", 486 surrogate_objs=None, 487 customize=False, 488 cv=5, 489 n_jobs=None, 490 n_init=10, 491 n_iter=190, 492 abs_tol=1e-3, 493 verbose=1, 494 seed=123, 495 ): 496 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 497 498 Parameters: 499 500 X_train: array-like, 501 Training vectors, where rows is the number of samples 502 and columns is the number of features. 503 504 y_train: array-like, 505 Training vectors, where rows is the number of samples 506 and columns is the number of features. 507 508 X_test: array-like, 509 Testing vectors, where rows is the number of samples 510 and columns is the number of features. 511 512 y_test: array-like, 513 Testing vectors, where rows is the number of samples 514 and columns is the number of features. 515 516 scoring: str 517 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 518 519 surrogate_objs: object names as a list of strings; 520 ML models for estimating the uncertainty around the objective function 521 522 customize: boolean 523 if True, the surrogate is transformed into a quasi-randomized network (default is False) 524 525 cv: int; 526 number of cross-validation folds 527 528 n_jobs: int; 529 number of jobs for parallel execution 530 531 n_init: an integer; 532 number of points in the initial setting, when `x_init` and `y_init` are not provided 533 534 n_iter: an integer; 535 number of iterations of the minimization algorithm 536 537 abs_tol: a float; 538 tolerance for convergence of the optimizer (early stopping based on acquisition function) 539 540 verbose: int 541 controls verbosity 542 543 seed: int 544 reproducibility seed 545 546 Examples: 547 548 ```python 549 ``` 550 """ 551 552 removed_regressors = [ 553 "TheilSenRegressor", 554 "ARDRegression", 555 "CCA", 556 "GaussianProcessRegressor", 557 "GradientBoostingRegressor", 558 "HistGradientBoostingRegressor", 559 "IsotonicRegression", 560 "MultiOutputRegressor", 561 "MultiTaskElasticNet", 562 "MultiTaskElasticNetCV", 563 "MultiTaskLasso", 564 "MultiTaskLassoCV", 565 "OrthogonalMatchingPursuit", 566 "OrthogonalMatchingPursuitCV", 567 "PLSCanonical", 568 "PLSRegression", 569 "RadiusNeighborsRegressor", 570 "RegressorChain", 571 "StackingRegressor", 572 "VotingRegressor", 573 ] 574 575 results = [] 576 577 for est in all_estimators(): 578 if surrogate_objs is None: 579 if issubclass(est[1], RegressorMixin) and ( 580 est[0] not in removed_regressors 581 ): 582 try: 583 if customize == True: 584 surr_obj = ns.CustomClassifier(obj=est[1]()) 585 else: 586 surr_obj = est[1]() 587 res = self.cross_val_optim( 588 X_train=X_train, 589 y_train=y_train, 590 X_test=X_test, 591 y_test=y_test, 592 surrogate_obj=surr_obj, 593 cv=cv, 594 n_jobs=n_jobs, 595 scoring=scoring, 596 n_init=n_init, 597 n_iter=n_iter, 598 abs_tol=abs_tol, 599 verbose=verbose, 600 seed=seed, 601 ) 602 if customize == True: 603 results.append((f"CustomClassifier({est[0]})", res)) 604 else: 605 results.append((est[0], res)) 606 except: 607 pass 608 609 else: 610 if ( 611 issubclass(est[1], RegressorMixin) 612 and (est[0] not in removed_regressors) 613 and est[0] in surrogate_objs 614 ): 615 try: 616 if customize == True: 617 surr_obj = ns.CustomClassifier(obj=est[1]()) 618 else: 619 surr_obj = est[1]() 620 res = self.cross_val_optim( 621 X_train=X_train, 622 y_train=y_train, 623 X_test=X_test, 624 y_test=y_test, 625 surrogate_obj=surr_obj, 626 cv=cv, 627 n_jobs=n_jobs, 628 scoring=scoring, 629 n_init=n_init, 630 n_iter=n_iter, 631 abs_tol=abs_tol, 632 verbose=verbose, 633 seed=seed, 634 ) 635 if customize == True: 636 results.append((f"CustomClassifier({est[0]})", res)) 637 else: 638 results.append((est[0], res)) 639 except: 640 pass 641 642 return results 643 644 @property 645 def _estimator_type(self): 646 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 for _ in range(self.n_layers - 1): 223 obj = CustomRegressor( 224 obj=deepcopy(obj), 225 n_hidden_features=n_hidden_features, 226 activation_name=activation_name, 227 a=a, 228 nodes_sim=nodes_sim, 229 bias=bias, 230 dropout=dropout, 231 direct_link=direct_link, 232 n_clusters=n_clusters, 233 cluster_encode=cluster_encode, 234 type_clust=type_clust, 235 type_scaling=type_scaling, 236 seed=seed, 237 backend=backend, 238 ) 239 240 self.obj = deepcopy(obj) 241 super().__init__( 242 obj=self.obj, 243 n_hidden_features=n_hidden_features, 244 activation_name=activation_name, 245 a=a, 246 nodes_sim=nodes_sim, 247 bias=bias, 248 dropout=dropout, 249 direct_link=direct_link, 250 n_clusters=n_clusters, 251 cluster_encode=cluster_encode, 252 type_clust=type_clust, 253 type_scaling=type_scaling, 254 lags=lags, 255 type_pi=type_pi, 256 block_size=block_size, 257 replications=replications, 258 kernel=kernel, 259 agg=agg, 260 seed=seed, 261 backend=backend, 262 verbose=verbose, 263 show_progress=show_progress, 264 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10M[:,0]
M[:,2] = 25M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
21class GLMClassifier(GLM, ClassifierMixin): 22 """Generalized 'linear' models using quasi-randomized networks (classification) 23 24 Parameters: 25 26 n_hidden_features: int 27 number of nodes in the hidden layer 28 29 lambda1: float 30 regularization parameter for GLM coefficients on original features 31 32 alpha1: float 33 controls compromize between l1 and l2 norm of GLM coefficients on original features 34 35 lambda2: float 36 regularization parameter for GLM coefficients on nonlinear features 37 38 alpha2: float 39 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 40 41 activation_name: str 42 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 43 44 a: float 45 hyperparameter for 'prelu' or 'elu' activation function 46 47 nodes_sim: str 48 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 49 'uniform' 50 51 bias: boolean 52 indicates if the hidden layer contains a bias term (True) or not 53 (False) 54 55 dropout: float 56 regularization parameter; (random) percentage of nodes dropped out 57 of the training 58 59 direct_link: boolean 60 indicates if the original predictors are included (True) in model's 61 fitting or not (False) 62 63 n_clusters: int 64 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 65 no clustering) 66 67 cluster_encode: bool 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding 70 71 type_clust: str 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm') 74 75 type_scaling: a tuple of 3 strings 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax') 79 80 optimizer: object 81 optimizer, from class nnetsauce.Optimizer 82 83 backend: str. 84 "cpu" or "gpu" or "tpu". 85 86 seed: int 87 reproducibility seed for nodes_sim=='uniform' 88 89 Attributes: 90 91 beta_: vector 92 regression coefficients 93 94 Examples: 95 96 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 97 98 """ 99 100 # construct the object ----- 101 _estimator_type = "classifier" 102 103 def __init__( 104 self, 105 n_hidden_features=5, 106 lambda1=0.01, 107 alpha1=0.5, 108 lambda2=0.01, 109 alpha2=0.5, 110 family="expit", 111 activation_name="relu", 112 a=0.01, 113 nodes_sim="sobol", 114 bias=True, 115 dropout=0, 116 direct_link=True, 117 n_clusters=2, 118 cluster_encode=True, 119 type_clust="kmeans", 120 type_scaling=("std", "std", "std"), 121 optimizer=Optimizer(), 122 backend="cpu", 123 seed=123, 124 ): 125 super().__init__( 126 n_hidden_features=n_hidden_features, 127 lambda1=lambda1, 128 alpha1=alpha1, 129 lambda2=lambda2, 130 alpha2=alpha2, 131 activation_name=activation_name, 132 a=a, 133 nodes_sim=nodes_sim, 134 bias=bias, 135 dropout=dropout, 136 direct_link=direct_link, 137 n_clusters=n_clusters, 138 cluster_encode=cluster_encode, 139 type_clust=type_clust, 140 type_scaling=type_scaling, 141 optimizer=optimizer, 142 backend=backend, 143 seed=seed, 144 ) 145 146 self.family = family 147 148 def logit_loss(self, Y, row_index, XB): 149 self.n_classes = Y.shape[1] # len(np.unique(y)) 150 # Y = mo.one_hot_encode2(y, self.n_classes) 151 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 152 153 # max_double = 709.0 # only if softmax 154 # XB[XB > max_double] = max_double 155 XB[XB > 709.0] = 709.0 156 157 if row_index is None: 158 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 159 160 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 161 162 def expit_erf_loss(self, Y, row_index, XB): 163 # self.n_classes = len(np.unique(y)) 164 # Y = mo.one_hot_encode2(y, self.n_classes) 165 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 166 self.n_classes = Y.shape[1] 167 168 if row_index is None: 169 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 170 171 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 172 173 def loss_func( 174 self, 175 beta, 176 group_index, 177 X, 178 Y, 179 y, 180 row_index=None, 181 type_loss="logit", 182 **kwargs 183 ): 184 res = { 185 "logit": self.logit_loss, 186 "expit": self.expit_erf_loss, 187 "erf": self.expit_erf_loss, 188 } 189 190 if row_index is None: 191 row_index = range(len(y)) 192 XB = self.compute_XB( 193 X, 194 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 195 ) 196 197 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 198 group_index=group_index, beta=beta 199 ) 200 201 XB = self.compute_XB( 202 X, 203 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 204 row_index=row_index, 205 ) 206 207 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 208 group_index=group_index, beta=beta 209 ) 210 211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self 278 279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 298 299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 350 351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X)) 403 404 @property 405 def _estimator_type(self): 406 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
20class GLMRegressor(GLM, RegressorMixin): 21 """Generalized 'linear' models using quasi-randomized networks (regression) 22 23 Attributes: 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 lambda1: float 29 regularization parameter for GLM coefficients on original features 30 31 alpha1: float 32 controls compromize between l1 and l2 norm of GLM coefficients on original features 33 34 lambda2: float 35 regularization parameter for GLM coefficients on nonlinear features 36 37 alpha2: float 38 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 39 40 family: str 41 "gaussian", "laplace", "poisson", or "quantile" (for now) 42 43 level: int, default=50 44 The level of the quantiles to compute for family = "quantile". 45 Default is the median. 46 47 activation_name: str 48 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 49 50 a: float 51 hyperparameter for 'prelu' or 'elu' activation function 52 53 nodes_sim: str 54 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 55 'uniform' 56 57 bias: boolean 58 indicates if the hidden layer contains a bias term (True) or not 59 (False) 60 61 dropout: float 62 regularization parameter; (random) percentage of nodes dropped out 63 of the training 64 65 direct_link: boolean 66 indicates if the original predictors are included (True) in model's 67 fitting or not (False) 68 69 n_clusters: int 70 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 71 no clustering) 72 73 cluster_encode: bool 74 defines how the variable containing clusters is treated (default is one-hot) 75 if `False`, then labels are used, without one-hot encoding 76 77 type_clust: str 78 type of clustering method: currently k-means ('kmeans') or Gaussian 79 Mixture Model ('gmm') 80 81 type_scaling: a tuple of 3 strings 82 scaling methods for inputs, hidden layer, and clustering respectively 83 (and when relevant). 84 Currently available: standardization ('std') or MinMax scaling ('minmax') 85 86 optimizer: object 87 optimizer, from class nnetsauce.utils.Optimizer 88 89 backend: str. 90 "cpu" or "gpu" or "tpu". 91 92 seed: int 93 reproducibility seed for nodes_sim=='uniform' 94 95 backend: str 96 "cpu", "gpu", "tpu" 97 98 Attributes: 99 100 beta_: vector 101 regression coefficients 102 103 Examples: 104 105 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 lambda1=0.01, 115 alpha1=0.5, 116 lambda2=0.01, 117 alpha2=0.5, 118 family="gaussian", 119 level=50, 120 activation_name="relu", 121 a=0.01, 122 nodes_sim="sobol", 123 bias=True, 124 dropout=0, 125 direct_link=True, 126 n_clusters=2, 127 cluster_encode=True, 128 type_clust="kmeans", 129 type_scaling=("std", "std", "std"), 130 optimizer=Optimizer(), 131 backend="cpu", 132 seed=123, 133 ): 134 super().__init__( 135 n_hidden_features=n_hidden_features, 136 lambda1=lambda1, 137 alpha1=alpha1, 138 lambda2=lambda2, 139 alpha2=alpha2, 140 activation_name=activation_name, 141 a=a, 142 nodes_sim=nodes_sim, 143 bias=bias, 144 dropout=dropout, 145 direct_link=direct_link, 146 n_clusters=n_clusters, 147 cluster_encode=cluster_encode, 148 type_clust=type_clust, 149 type_scaling=type_scaling, 150 optimizer=optimizer, 151 backend=backend, 152 seed=seed, 153 ) 154 155 self.family = family 156 self.level = level 157 self.q = self.level / 100 158 159 def gaussian_loss(self, y, row_index, XB): 160 return 0.5 * np.mean(np.square(y[row_index] - XB)) 161 162 def laplace_loss(self, y, row_index, XB): 163 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 164 165 def poisson_loss(self, y, row_index, XB): 166 return -np.mean(y[row_index] * XB - np.exp(XB)) 167 168 def pinball_loss(self, y, row_index, XB, tau=0.5): 169 y = np.array(y[row_index]) 170 y_pred = np.array(XB) 171 return mean_pinball_loss(y, y_pred, alpha=tau) 172 # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals)) 173 174 def loss_func( 175 self, 176 beta, 177 group_index, 178 X, 179 y, 180 row_index=None, 181 type_loss="gaussian", 182 **kwargs 183 ): 184 res = { 185 "gaussian": self.gaussian_loss, 186 "laplace": self.laplace_loss, 187 "poisson": self.poisson_loss, 188 "quantile": self.pinball_loss, 189 } 190 191 if type_loss != "quantile": 192 if row_index is None: 193 row_index = range(len(y)) 194 XB = self.compute_XB(X, beta=beta) 195 196 return res[type_loss](y, row_index, XB) + self.compute_penalty( 197 group_index=group_index, beta=beta 198 ) 199 200 XB = self.compute_XB(X, beta=beta, row_index=row_index) 201 202 return res[type_loss](y, row_index, XB) + self.compute_penalty( 203 group_index=group_index, beta=beta 204 ) 205 206 else: # quantile 207 assert ( 208 self.q > 0 and self.q < 1 209 ), "'tau' must be comprised 0 < tau < 1" 210 211 if row_index is None: 212 row_index = range(len(y)) 213 XB = self.compute_XB(X, beta=beta) 214 return res[type_loss](y, row_index, XB, self.q) 215 216 XB = self.compute_XB(X, beta=beta, row_index=row_index) 217 return res[type_loss](y, row_index, XB, self.q) 218 219 def fit(self, X, y, **kwargs): 220 """Fit GLM model to training data (X, y). 221 222 Args: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 238 """ 239 self.beta_ = None 240 self.n_iter = 0 241 242 _, self.group_index = X.shape 243 244 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 245 # initialization 246 if self.backend == "cpu": 247 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 248 else: 249 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 # optimization 251 # fit(self, loss_func, response, x0, **kwargs): 252 # loss_func(self, beta, group_index, X, y, 253 # row_index=None, type_loss="gaussian", 254 # **kwargs) 255 self.optimizer.fit( 256 self.loss_func, 257 response=centered_y, 258 x0=beta_, 259 group_index=self.group_index, 260 X=scaled_Z, 261 y=centered_y, 262 type_loss=self.family, 263 **kwargs 264 ) 265 266 self.beta_ = self.optimizer.results[0] 267 268 return self 269 270 def predict(self, X, **kwargs): 271 """Predict test data X. 272 273 Args: 274 275 X: {array-like}, shape = [n_samples, n_features] 276 Training vectors, where n_samples is the number 277 of samples and n_features is the number of features. 278 279 **kwargs: additional parameters to be passed to 280 self.cook_test_set 281 282 Returns: 283 284 model predictions: {array-like} 285 286 """ 287 288 if len(X.shape) == 1: 289 n_features = X.shape[0] 290 new_X = mo.rbind( 291 X.reshape(1, n_features), 292 np.ones(n_features).reshape(1, n_features), 293 ) 294 295 return ( 296 self.y_mean_ 297 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 298 )[0] 299 300 return self.y_mean_ + np.dot( 301 self.cook_test_set(X, **kwargs), self.beta_ 302 ) 303 304 def score(self, X, y, scoring=None): 305 """Compute the score of the model. 306 307 Parameters: 308 309 X: {array-like}, shape = [n_samples, n_features] 310 Training vectors, where n_samples is the number 311 of samples and n_features is the number of features. 312 313 y: array-like, shape = [n_samples] 314 Target values. 315 316 scoring: str 317 scoring method 318 319 Returns: 320 321 score: float 322 323 """ 324 325 if scoring is None: 326 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 327 328 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace", "poisson", or "quantile" (for now)
level: int, default=50
The level of the quantiles to compute for family = "quantile".
Default is the median.
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.utils.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu", "gpu", "tpu"
Attributes:
beta_: vector
regression coefficients
Examples:
See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
219 def fit(self, X, y, **kwargs): 220 """Fit GLM model to training data (X, y). 221 222 Args: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 238 """ 239 self.beta_ = None 240 self.n_iter = 0 241 242 _, self.group_index = X.shape 243 244 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 245 # initialization 246 if self.backend == "cpu": 247 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 248 else: 249 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 # optimization 251 # fit(self, loss_func, response, x0, **kwargs): 252 # loss_func(self, beta, group_index, X, y, 253 # row_index=None, type_loss="gaussian", 254 # **kwargs) 255 self.optimizer.fit( 256 self.loss_func, 257 response=centered_y, 258 x0=beta_, 259 group_index=self.group_index, 260 X=scaled_Z, 261 y=centered_y, 262 type_loss=self.family, 263 **kwargs 264 ) 265 266 self.beta_ = self.optimizer.results[0] 267 268 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
270 def predict(self, X, **kwargs): 271 """Predict test data X. 272 273 Args: 274 275 X: {array-like}, shape = [n_samples, n_features] 276 Training vectors, where n_samples is the number 277 of samples and n_features is the number of features. 278 279 **kwargs: additional parameters to be passed to 280 self.cook_test_set 281 282 Returns: 283 284 model predictions: {array-like} 285 286 """ 287 288 if len(X.shape) == 1: 289 n_features = X.shape[0] 290 new_X = mo.rbind( 291 X.reshape(1, n_features), 292 np.ones(n_features).reshape(1, n_features), 293 ) 294 295 return ( 296 self.y_mean_ 297 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 298 )[0] 299 300 return self.y_mean_ + np.dot( 301 self.cook_test_set(X, **kwargs), self.beta_ 302 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
304 def score(self, X, y, scoring=None): 305 """Compute the score of the model. 306 307 Parameters: 308 309 X: {array-like}, shape = [n_samples, n_features] 310 Training vectors, where n_samples is the number 311 of samples and n_features is the number of features. 312 313 y: array-like, shape = [n_samples] 314 Target values. 315 316 scoring: str 317 scoring method 318 319 Returns: 320 321 score: float 322 323 """ 324 325 if scoring is None: 326 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 327 328 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt( 95 jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 96 ) 97 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 98 99 # Matérn kernel formula 100 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 101 matern_kernel = ( 102 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 103 ) 104 matern_kernel = jnp.where( 105 dists == 0, 1.0, matern_kernel 106 ) # Handle the case where distance is 0 107 return matern_kernel 108 else: 109 # Use NumPy for CPU 110 from scipy.special import ( 111 gammaln, 112 kv, 113 ) # Ensure scipy.special is used for CPU 114 115 dists = np.sqrt( 116 np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 117 ) 118 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 119 120 # Matérn kernel formula 121 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 122 matern_kernel = ( 123 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 124 ) 125 matern_kernel = np.where( 126 dists == 0, 1.0, matern_kernel 127 ) # Handle the case where distance is 0 128 return matern_kernel 129 130 def _get_kernel(self, X, Y): 131 if self.kernel == "linear": 132 return self._linear_kernel(X, Y) 133 elif self.kernel == "rbf": 134 return self._rbf_kernel(X, Y) 135 elif self.kernel == "matern": 136 return self._matern_kernel(X, Y) 137 else: 138 raise ValueError(f"Unsupported kernel: {self.kernel}") 139 140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self 196 197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds 231 232 def partial_fit(self, X, y): 233 """ 234 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 235 236 Parameters: 237 - X: array-like, shape (n_samples, n_features) 238 New training data. 239 - y: array-like, shape (n_samples,) 240 New target values. 241 242 Returns: 243 - self: object 244 The updated model. 245 """ 246 # Standardize the inputs 247 X = ( 248 self.scaler.fit_transform(X) 249 if not hasattr(self, "X_fit_") 250 else self.scaler.transform(X) 251 ) 252 253 if not hasattr(self, "X_fit_"): 254 # Initialize with the first batch of data 255 self.X_fit_ = X 256 257 # Center the response 258 self.y_mean_ = np.mean(y) 259 y_centered = y - self.y_mean_ 260 self.y_fit_ = y_centered 261 262 n_samples = X.shape[0] 263 264 # Compute the kernel matrix for the initial data 265 self.K_ = self._get_kernel(X, X) 266 267 # Initialize dual coefficients for each alpha 268 if isinstance(self.alpha, (list, np.ndarray)): 269 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 270 else: 271 self.dual_coef_ = np.zeros(n_samples) 272 else: 273 # Incrementally update with new data 274 y_centered = y - self.y_mean_ # Center the new batch of responses 275 for x_new, y_new in zip(X, y_centered): 276 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 277 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 278 279 # Compute the kernel value for the new data point 280 k_self = self._get_kernel(x_new, x_new).item() 281 282 if isinstance(self.alpha, (list, np.ndarray)): 283 # Update dual coefficients for each alpha 284 for idx, alpha in enumerate(self.alpha): 285 gamma_new = 1 / (k_self + alpha) 286 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 287 self.dual_coefs_[idx] = np.append( 288 self.dual_coefs_[idx], gamma_new * residual 289 ) 290 else: 291 # Update dual coefficients for a single alpha 292 gamma_new = 1 / (k_self + self.alpha) 293 residual = y_new - np.dot(self.dual_coef_, k_new) 294 self.dual_coef_ = np.append( 295 self.dual_coef_, gamma_new * residual 296 ) 297 298 # Update the kernel matrix 299 self.K_ = np.block( 300 [ 301 [self.K_, k_new[:, None]], 302 [k_new[None, :], np.array([[k_self]])], 303 ] 304 ) 305 306 # Update the stored data 307 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 308 self.y_fit_ = np.append(self.y_fit_, y_new) 309 310 # Select the best alpha based on LOOE after the batch 311 if isinstance(self.alpha, (list, np.ndarray)): 312 self.looe_ = [] 313 for idx, alpha in enumerate(self.alpha): 314 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 315 G_inv = np.linalg.inv(G) 316 diag_G_inv = np.diag(G_inv) 317 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 318 self.looe_.append(looe) 319 320 # Select the best alpha 321 best_index = np.argmin(self.looe_) 322 self.alpha_ = self.alpha[best_index] 323 self.dual_coef_ = self.dual_coefs_[best_index] 324 325 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
654class LazyRegressor(LazyDeepRegressor): 655 """ 656 Fitting -- almost -- all the regression algorithms with 657 nnetsauce's CustomRegressor and returning their scores. 658 659 Parameters: 660 661 verbose: int, optional (default=0) 662 Any positive number for verbosity. 663 664 ignore_warnings: bool, optional (default=True) 665 When set to True, the warning related to algorigms that are not able to run are ignored. 666 667 custom_metric: function, optional (default=None) 668 When function is provided, models are evaluated based on the custom evaluation metric provided. 669 670 predictions: bool, optional (default=False) 671 When set to True, the predictions of all the models models are returned as dataframe. 672 673 sort_by: string, optional (default='RMSE') 674 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 675 or a custom metric identified by its name and provided by custom_metric. 676 677 random_state: int, optional (default=42) 678 Reproducibiility seed. 679 680 estimators: list, optional (default='all') 681 list of Estimators names or just 'all' (default='all') 682 683 preprocess: bool 684 preprocessing is done when set to True 685 686 n_jobs : int, when possible, run in parallel 687 For now, only used by individual models that support it. 688 689 All the other parameters are the same as CustomRegressor's. 690 691 Attributes: 692 693 models_: dict-object 694 Returns a dictionary with each model pipeline as value 695 with key as name of models. 696 697 best_model_: object 698 Returns the best model pipeline based on the sort_by metric. 699 700 Examples: 701 702 import nnetsauce as ns 703 import numpy as np 704 from sklearn import datasets 705 from sklearn.utils import shuffle 706 707 diabetes = datasets.load_diabetes() 708 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 709 X = X.astype(np.float32) 710 711 offset = int(X.shape[0] * 0.9) 712 X_train, y_train = X[:offset], y[:offset] 713 X_test, y_test = X[offset:], y[offset:] 714 715 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 716 custom_metric=None) 717 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 718 print(models) 719 720 """ 721 722 def __init__( 723 self, 724 verbose=0, 725 ignore_warnings=True, 726 custom_metric=None, 727 predictions=False, 728 sort_by="RMSE", 729 random_state=42, 730 estimators="all", 731 preprocess=False, 732 n_jobs=None, 733 # CustomRegressor attributes 734 obj=None, 735 n_hidden_features=5, 736 activation_name="relu", 737 a=0.01, 738 nodes_sim="sobol", 739 bias=True, 740 dropout=0, 741 direct_link=True, 742 n_clusters=2, 743 cluster_encode=True, 744 type_clust="kmeans", 745 type_scaling=("std", "std", "std"), 746 col_sample=1, 747 row_sample=1, 748 seed=123, 749 backend="cpu", 750 ): 751 super().__init__( 752 verbose=verbose, 753 ignore_warnings=ignore_warnings, 754 custom_metric=custom_metric, 755 predictions=predictions, 756 sort_by=sort_by, 757 random_state=random_state, 758 estimators=estimators, 759 preprocess=preprocess, 760 n_jobs=n_jobs, 761 n_layers=1, 762 obj=obj, 763 n_hidden_features=n_hidden_features, 764 activation_name=activation_name, 765 a=a, 766 nodes_sim=nodes_sim, 767 bias=bias, 768 dropout=dropout, 769 direct_link=direct_link, 770 n_clusters=n_clusters, 771 cluster_encode=cluster_encode, 772 type_clust=type_clust, 773 type_scaling=type_scaling, 774 col_sample=col_sample, 775 row_sample=row_sample, 776 seed=seed, 777 backend=backend, 778 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[ 332 self.custom_metric.__name__ 333 ] = custom_metric 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[ 460 self.custom_metric.__name__ 461 ] = custom_metric 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[ 572 self.custom_metric.__name__ 573 ] = custom_metric 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores 605 606 def get_best_model(self): 607 """ 608 This function returns the best model pipeline based on the sort_by metric. 609 610 Returns: 611 612 best_model: object, 613 Returns the best model pipeline based on the sort_by metric. 614 615 """ 616 return self.best_model_ 617 618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[ 332 self.custom_metric.__name__ 333 ] = custom_metric 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[ 460 self.custom_metric.__name__ 461 ] = custom_metric 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[ 572 self.custom_metric.__name__ 573 ] = custom_metric 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
998class LazyMTS(LazyDeepMTS): 999 """ 1000 Fitting -- almost -- all the regression algorithms to multivariate time series 1001 and returning their scores (no layers). 1002 1003 Parameters: 1004 1005 verbose: int, optional (default=0) 1006 Any positive number for verbosity. 1007 1008 ignore_warnings: bool, optional (default=True) 1009 When set to True, the warning related to algorigms that are not 1010 able to run are ignored. 1011 1012 custom_metric: function, optional (default=None) 1013 When function is provided, models are evaluated based on the custom 1014 evaluation metric provided. 1015 1016 predictions: bool, optional (default=False) 1017 When set to True, the predictions of all the models models are returned as dataframe. 1018 1019 sort_by: string, optional (default='RMSE') 1020 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1021 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1022 provided by custom_metric. 1023 1024 random_state: int, optional (default=42) 1025 Reproducibiility seed. 1026 1027 estimators: list, optional (default='all') 1028 list of Estimators (regression algorithms) names or just 'all' (default='all') 1029 1030 preprocess: bool, preprocessing is done when set to True 1031 1032 h: int, optional (default=None) 1033 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1034 1035 All the other parameters are the same as MTS's. 1036 1037 Attributes: 1038 1039 models_: dict-object 1040 Returns a dictionary with each model pipeline as value 1041 with key as name of models. 1042 1043 best_model_: object 1044 Returns the best model pipeline based on the sort_by metric. 1045 1046 Examples: 1047 1048 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1049 1050 """ 1051 1052 def __init__( 1053 self, 1054 verbose=0, 1055 ignore_warnings=True, 1056 custom_metric=None, 1057 predictions=False, 1058 sort_by=None, # leave it as is 1059 random_state=42, 1060 estimators="all", 1061 preprocess=False, 1062 h=None, 1063 # MTS attributes 1064 obj=None, 1065 n_hidden_features=5, 1066 activation_name="relu", 1067 a=0.01, 1068 nodes_sim="sobol", 1069 bias=True, 1070 dropout=0, 1071 direct_link=True, 1072 n_clusters=2, 1073 cluster_encode=True, 1074 type_clust="kmeans", 1075 type_scaling=("std", "std", "std"), 1076 lags=15, 1077 type_pi="scp2-kde", 1078 block_size=None, 1079 replications=None, 1080 kernel=None, 1081 agg="mean", 1082 seed=123, 1083 backend="cpu", 1084 show_progress=False, 1085 ): 1086 super().__init__( 1087 verbose=verbose, 1088 ignore_warnings=ignore_warnings, 1089 custom_metric=custom_metric, 1090 predictions=predictions, 1091 sort_by=sort_by, 1092 random_state=random_state, 1093 estimators=estimators, 1094 preprocess=preprocess, 1095 n_layers=1, 1096 h=h, 1097 obj=obj, 1098 n_hidden_features=n_hidden_features, 1099 activation_name=activation_name, 1100 a=a, 1101 nodes_sim=nodes_sim, 1102 bias=bias, 1103 dropout=dropout, 1104 direct_link=direct_link, 1105 n_clusters=n_clusters, 1106 cluster_encode=cluster_encode, 1107 type_clust=type_clust, 1108 type_scaling=type_scaling, 1109 lags=lags, 1110 type_pi=type_pi, 1111 block_size=block_size, 1112 replications=replications, 1113 kernel=kernel, 1114 agg=agg, 1115 seed=seed, 1116 backend=backend, 1117 show_progress=show_progress, 1118 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores 950 951 def get_best_model(self): 952 """ 953 This function returns the best model pipeline based on the sort_by metric. 954 955 Returns: 956 957 best_model: object, 958 Returns the best model pipeline based on the sort_by metric. 959 960 """ 961 return self.best_model_ 962 963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
10class MLARCH: 11 """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns) 12 13 Parameters 14 ---------- 15 model_mean : object 16 Model for mean component 17 model_sigma : object 18 Model for volatility component (sklearn regressor) 19 model_residuals : object 20 Model for standardized residuals 21 lags_vol : int, default=10 22 Number of lags for squared residuals in volatility model 23 """ 24 25 def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10): 26 self.model_mean = model_mean 27 self.model_sigma = model_sigma 28 self.model_residuals = model_residuals 29 self.lags_vol = lags_vol 30 31 def _create_lags(self, y, lags): 32 """Create lagged feature matrix""" 33 n = len(y) 34 if n <= lags: 35 raise ValueError(f"Series length {n} must be > lags {lags}") 36 X = np.zeros((n - lags, lags)) 37 for i in range(lags): 38 X[:, i] = y[i: (n - lags + i)] 39 return X 40 41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self 99 100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
Parameters
model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model
41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self
Fit the MLARCH model
Parameters
y : array-like Target time series (should be stationary, e.g., returns)
Returns
self
100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Predict future values
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths
Returns
DescribeResult Named tuple with mean, sims, lower, upper
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
Read more in the :ref:User Guide <voting_regressor>.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit method on the VotingRegressor will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_. An estimator can be set to 'drop' using
set_params().
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float or int) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None.
n_jobs : int, default=None
The number of jobs to run in parallel for fit.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_,)
Names of features seen during :term:fit. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr' estimator with
~VotingRegressor.set_params() and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
31class MTS(Base): 32 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 33 34 Parameters: 35 36 obj: object. 37 any object containing a method fit (obj.fit()) and a method predict 38 (obj.predict()). 39 40 n_hidden_features: int. 41 number of nodes in the hidden layer. 42 43 activation_name: str. 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 45 46 a: float. 47 hyperparameter for 'prelu' or 'elu' activation function. 48 49 nodes_sim: str. 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform'. 52 53 bias: boolean. 54 indicates if the hidden layer contains a bias term (True) or not 55 (False). 56 57 dropout: float. 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training. 60 61 direct_link: boolean. 62 indicates if the original predictors are included (True) in model's fitting or not (False). 63 64 n_clusters: int. 65 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 66 67 cluster_encode: bool. 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding. 70 71 type_clust: str. 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm'). 74 75 type_scaling: a tuple of 3 strings. 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax'). 79 80 lags: int. 81 number of lags used for each time series. 82 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 83 84 type_pi: str. 85 type of prediction interval; currently: 86 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 87 - "quantile": use model-agnostic quantile regression under the hood 88 - "kde": based on Kernel Density Estimation of in-sample residuals 89 - "bootstrap": based on independent bootstrap of in-sample residuals 90 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 91 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 92 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 93 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 94 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 95 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 96 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 97 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 98 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 99 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 100 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 101 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 102 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 103 104 level: int. 105 level of confidence for `type_pi == 'quantile'` (default is `95`) 106 107 block_size: int. 108 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 109 Default is round(3.15*(n_residuals^1/3)) 110 111 replications: int. 112 number of replications (if needed, for predictive simulation). Default is 'None'. 113 114 kernel: str. 115 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 116 117 agg: str. 118 either "mean" or "median" for simulation of bootstrap aggregating 119 120 seed: int. 121 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 122 123 backend: str. 124 "cpu" or "gpu" or "tpu". 125 126 verbose: int. 127 0: not printing; 1: printing 128 129 show_progress: bool. 130 True: progress bar when fitting each series; False: no progress bar when fitting each series 131 132 Attributes: 133 134 fit_objs_: dict 135 objects adjusted to each individual time series 136 137 y_: {array-like} 138 MTS responses (most recent observations first) 139 140 X_: {array-like} 141 MTS lags 142 143 xreg_: {array-like} 144 external regressors 145 146 y_means_: dict 147 a dictionary of each series mean values 148 149 preds_: {array-like} 150 successive model predictions 151 152 preds_std_: {array-like} 153 standard deviation around the predictions for Bayesian base learners (`obj`) 154 155 gaussian_preds_std_: {array-like} 156 standard deviation around the predictions for `type_pi='gaussian'` 157 158 return_std_: boolean 159 return uncertainty or not (set in predict) 160 161 df_: data frame 162 the input data frame, in case a data.frame is provided to `fit` 163 164 n_obs_: int 165 number of time series observations (number of rows for multivariate) 166 167 level_: int 168 level of confidence for prediction intervals (default is 95) 169 170 residuals_: {array-like} 171 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 172 (for `type_pi` in conformal prediction) 173 174 residuals_sims_: tuple of {array-like} 175 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 176 calibrated residuals (for `type_pi` in conformal prediction) 177 178 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 179 180 residuals_std_dev_: residuals standard deviation 181 182 Examples: 183 184 Example 1: 185 186 ```python 187 import nnetsauce as ns 188 import numpy as np 189 from sklearn import linear_model 190 np.random.seed(123) 191 192 M = np.random.rand(10, 3) 193 M[:,0] = 10*M[:,0] 194 M[:,2] = 25*M[:,2] 195 print(M) 196 197 # Adjust Bayesian Ridge 198 regr4 = linear_model.BayesianRidge() 199 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 200 obj_MTS.fit(M) 201 print(obj_MTS.predict()) 202 203 # with credible intervals 204 print(obj_MTS.predict(return_std=True, level=80)) 205 206 print(obj_MTS.predict(return_std=True, level=95)) 207 ``` 208 209 Example 2: 210 211 ```python 212 import nnetsauce as ns 213 import numpy as np 214 from sklearn import linear_model 215 216 dataset = { 217 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 218 'series1' : [34, 30, 35.6, 33.3, 38.1], 219 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 220 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 221 df = pd.DataFrame(dataset).set_index('date') 222 print(df) 223 224 # Adjust Bayesian Ridge 225 regr5 = linear_model.BayesianRidge() 226 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 227 obj_MTS.fit(df) 228 print(obj_MTS.predict()) 229 230 # with credible intervals 231 print(obj_MTS.predict(return_std=True, level=80)) 232 233 print(obj_MTS.predict(return_std=True, level=95)) 234 ``` 235 """ 236 237 # construct the object ----- 238 239 def __init__( 240 self, 241 obj, 242 n_hidden_features=5, 243 activation_name="relu", 244 a=0.01, 245 nodes_sim="sobol", 246 bias=True, 247 dropout=0, 248 direct_link=True, 249 n_clusters=2, 250 cluster_encode=True, 251 type_clust="kmeans", 252 type_scaling=("std", "std", "std"), 253 lags=1, 254 type_pi="kde", 255 level=95, 256 block_size=None, 257 replications=None, 258 kernel="gaussian", 259 agg="mean", 260 seed=123, 261 backend="cpu", 262 verbose=0, 263 show_progress=True, 264 ): 265 super().__init__( 266 n_hidden_features=n_hidden_features, 267 activation_name=activation_name, 268 a=a, 269 nodes_sim=nodes_sim, 270 bias=bias, 271 dropout=dropout, 272 direct_link=direct_link, 273 n_clusters=n_clusters, 274 cluster_encode=cluster_encode, 275 type_clust=type_clust, 276 type_scaling=type_scaling, 277 seed=seed, 278 backend=backend, 279 ) 280 281 # Add validation for lags parameter 282 if isinstance(lags, str): 283 assert lags in ( 284 "AIC", 285 "AICc", 286 "BIC", 287 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 288 else: 289 assert ( 290 int(lags) == lags 291 ), "if numeric, lags parameter should be an integer" 292 293 self.obj = obj 294 self.n_series = None 295 self.lags = lags 296 self.type_pi = type_pi 297 self.level = level 298 if self.type_pi == "quantile": 299 self.obj = QuantileRegressor( 300 self.obj, level=self.level, scoring="conformal" 301 ) 302 self.block_size = block_size 303 self.replications = replications 304 self.kernel = kernel 305 self.agg = agg 306 self.verbose = verbose 307 self.show_progress = show_progress 308 self.series_names = ["series0"] 309 self.input_dates = None 310 self.quantiles = None 311 self.fit_objs_ = {} 312 self.y_ = None # MTS responses (most recent observations first) 313 self.X_ = None # MTS lags 314 self.xreg_ = None 315 self.y_means_ = {} 316 self.mean_ = None 317 self.median_ = None 318 self.upper_ = None 319 self.lower_ = None 320 self.output_dates_ = None 321 self.preds_std_ = [] 322 self.gaussian_preds_std_ = None 323 self.alpha_ = None 324 self.return_std_ = None 325 self.df_ = None 326 self.residuals_ = [] 327 self.abs_calib_residuals_ = None 328 self.calib_residuals_quantile_ = None 329 self.residuals_sims_ = None 330 self.kde_ = None 331 self.sims_ = None 332 self.residuals_std_dev_ = None 333 self.n_obs_ = None 334 self.level_ = None 335 self.init_n_series_ = None 336 337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self 622 623 def partial_fit(self, X, xreg=None, **kwargs): 624 """partial_fit MTS model to training data X, with optional regressors xreg 625 626 Parameters: 627 628 X: {array-like}, shape = [n_samples, n_features] 629 Training time series, where n_samples is the number 630 of samples and n_features is the number of features; 631 X must be in increasing order (most recent observations last) 632 633 xreg: {array-like}, shape = [n_samples, n_features_xreg] 634 Additional (external) regressors to be passed to self.obj 635 xreg must be in 'increasing' order (most recent observations last) 636 637 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 638 639 Returns: 640 641 self: object 642 """ 643 try: 644 self.init_n_series_ = X.shape[1] 645 except IndexError as e: 646 self.init_n_series_ = 1 647 648 # Automatic lag selection if requested 649 if isinstance(self.lags, str): 650 max_lags = min(25, X.shape[0] // 4) 651 best_ic = float("inf") 652 best_lags = 1 653 654 if self.verbose: 655 print( 656 f"\nSelecting optimal number of lags using {self.lags}..." 657 ) 658 iterator = tqdm(range(1, max_lags + 1)) 659 else: 660 iterator = range(1, max_lags + 1) 661 662 for lag in iterator: 663 # Convert DataFrame to numpy array before reversing 664 if isinstance(X, pd.DataFrame): 665 X_values = X.values[::-1] 666 else: 667 X_values = X[::-1] 668 669 # Try current lag value 670 if self.init_n_series_ > 1: 671 mts_input = ts.create_train_inputs(X_values, lag) 672 else: 673 mts_input = ts.create_train_inputs( 674 X_values.reshape(-1, 1), lag 675 ) 676 677 # Cook training set and partial_fit model 678 dummy_y, scaled_Z = self.cook_training_set( 679 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 680 ) 681 residuals_ = [] 682 683 for i in range(self.init_n_series_): 684 y_mean = np.mean(mts_input[0][:, i]) 685 centered_y_i = mts_input[0][:, i] - y_mean 686 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 687 residuals_.append( 688 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 689 ) 690 691 self.residuals_ = np.asarray(residuals_).T 692 ic = self._compute_information_criterion( 693 curr_lags=lag, criterion=self.lags 694 ) 695 696 if self.verbose: 697 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 698 699 if ic < best_ic: 700 best_ic = ic 701 best_lags = lag 702 703 if self.verbose: 704 print( 705 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 706 ) 707 708 self.lags = best_lags 709 710 self.input_dates = None 711 self.df_ = None 712 713 if isinstance(X, pd.DataFrame) is False: 714 # input data set is a numpy array 715 if xreg is None: 716 X = pd.DataFrame(X) 717 if len(X.shape) > 1: 718 self.series_names = [ 719 "series" + str(i) for i in range(X.shape[1]) 720 ] 721 else: 722 self.series_names = ["series0"] 723 else: 724 # xreg is not None 725 X = mo.cbind(X, xreg) 726 self.xreg_ = xreg 727 728 else: # input data set is a DataFrame with column names 729 X_index = None 730 if X.index is not None: 731 X_index = X.index 732 if xreg is None: 733 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 734 else: 735 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 736 self.xreg_ = xreg 737 if X_index is not None: 738 X.index = X_index 739 self.series_names = X.columns.tolist() 740 741 if isinstance(X, pd.DataFrame): 742 if self.df_ is None: 743 self.df_ = X 744 X = X.values 745 else: 746 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 747 frequency = pd.infer_freq(input_dates_prev) 748 self.df_ = pd.concat([self.df_, X], axis=0) 749 self.input_dates = pd.date_range( 750 start=input_dates_prev[0], 751 periods=len(input_dates_prev) + X.shape[0], 752 freq=frequency, 753 ).values.tolist() 754 self.df_.index = self.input_dates 755 X = self.df_.values 756 self.df_.columns = self.series_names 757 else: 758 if self.df_ is None: 759 self.df_ = pd.DataFrame(X, columns=self.series_names) 760 else: 761 self.df_ = pd.concat( 762 [self.df_, pd.DataFrame(X, columns=self.series_names)], 763 axis=0, 764 ) 765 766 self.input_dates = ts.compute_input_dates(self.df_) 767 768 try: 769 # multivariate time series 770 n, p = X.shape 771 except: 772 # univariate time series 773 n = X.shape[0] 774 p = 1 775 self.n_obs_ = n 776 777 rep_1_n = np.repeat(1, n) 778 779 self.y_ = None 780 self.X_ = None 781 self.n_series = p 782 self.fit_objs_.clear() 783 self.y_means_.clear() 784 residuals_ = [] 785 self.residuals_ = None 786 self.residuals_sims_ = None 787 self.kde_ = None 788 self.sims_ = None 789 self.scaled_Z_ = None 790 self.centered_y_is_ = [] 791 792 if self.init_n_series_ > 1: 793 # multivariate time series 794 mts_input = ts.create_train_inputs(X[::-1], self.lags) 795 else: 796 # univariate time series 797 mts_input = ts.create_train_inputs( 798 X.reshape(-1, 1)[::-1], self.lags 799 ) 800 801 self.y_ = mts_input[0] 802 803 self.X_ = mts_input[1] 804 805 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 806 807 self.scaled_Z_ = scaled_Z 808 809 # loop on all the time series and adjust self.obj.partial_fit 810 if self.verbose > 0: 811 print( 812 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 813 ) 814 815 if self.show_progress is True: 816 iterator = tqdm(range(self.init_n_series_)) 817 else: 818 iterator = range(self.init_n_series_) 819 820 if self.type_pi in ( 821 "gaussian", 822 "kde", 823 "bootstrap", 824 "block-bootstrap", 825 ) or self.type_pi.startswith("vine"): 826 for i in iterator: 827 y_mean = np.mean(self.y_[:, i]) 828 self.y_means_[i] = y_mean 829 centered_y_i = self.y_[:, i] - y_mean 830 self.centered_y_is_.append(centered_y_i) 831 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 832 self.fit_objs_[i] = deepcopy(self.obj) 833 residuals_.append( 834 ( 835 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 836 ).tolist() 837 ) 838 839 if self.type_pi == "quantile": 840 for i in iterator: 841 y_mean = np.mean(self.y_[:, i]) 842 self.y_means_[i] = y_mean 843 centered_y_i = self.y_[:, i] - y_mean 844 self.centered_y_is_.append(centered_y_i) 845 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 846 self.fit_objs_[i] = deepcopy(self.obj) 847 848 if self.type_pi.startswith("scp"): 849 # split conformal prediction 850 for i in iterator: 851 n_y = self.y_.shape[0] 852 n_y_half = n_y // 2 853 first_half_idx = range(0, n_y_half) 854 second_half_idx = range(n_y_half, n_y) 855 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 856 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 857 self.obj.partial_fit( 858 X=scaled_Z[first_half_idx, :], y=centered_y_i_temp 859 ) 860 # calibrated residuals actually 861 residuals_.append( 862 ( 863 self.y_[second_half_idx, i] 864 - ( 865 y_mean_temp 866 + self.obj.predict(scaled_Z[second_half_idx, :]) 867 ) 868 ).tolist() 869 ) 870 # partial_fit on the second half 871 y_mean = np.mean(self.y_[second_half_idx, i]) 872 self.y_means_[i] = y_mean 873 centered_y_i = self.y_[second_half_idx, i] - y_mean 874 self.obj.partial_fit( 875 X=scaled_Z[second_half_idx, :], y=centered_y_i 876 ) 877 self.fit_objs_[i] = deepcopy(self.obj) 878 879 self.residuals_ = np.asarray(residuals_).T 880 881 if self.type_pi == "gaussian": 882 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 883 884 if self.type_pi.startswith("scp2"): 885 # Calculate mean and standard deviation for each column 886 data_mean = np.mean(self.residuals_, axis=0) 887 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 888 # Center and scale the array using broadcasting 889 self.residuals_ = ( 890 self.residuals_ - data_mean[np.newaxis, :] 891 ) / self.residuals_std_dev_[np.newaxis, :] 892 893 if self.replications != None and "kde" in self.type_pi: 894 if self.verbose > 0: 895 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 896 assert self.kernel in ( 897 "gaussian", 898 "tophat", 899 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 900 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 901 grid = GridSearchCV( 902 KernelDensity(kernel=self.kernel, **kwargs), 903 param_grid=kernel_bandwidths, 904 ) 905 grid.fit(self.residuals_) 906 907 if self.verbose > 0: 908 print( 909 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 910 ) 911 912 self.kde_ = grid.best_estimator_ 913 914 return self 915 916 def _predict_quantiles(self, h, quantiles, **kwargs): 917 """Predict arbitrary quantiles from simulated paths.""" 918 # Ensure output dates are set 919 self.output_dates_, _ = ts.compute_output_dates(self.df_, h) 920 921 # Trigger full prediction to generate self.sims_ 922 if not hasattr(self, "sims_") or self.sims_ is None: 923 _ = self.predict(h=h, level=95, **kwargs) # Any level triggers sim 924 925 result_dict = {} 926 927 # Stack simulations: (R, h, n_series) 928 sims_array = np.stack([sim.values for sim in self.sims_], axis=0) 929 930 # Compute quantiles over replication axis 931 q_values = np.quantile( 932 sims_array, quantiles, axis=0 933 ) # (n_q, h, n_series) 934 935 for i, q in enumerate(quantiles): 936 # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95" 937 q_label = ( 938 f"{int(q * 100):02d}" 939 if (q * 100).is_integer() 940 else f"{q:.3f}".replace(".", "_") 941 ) 942 for series_id in range(self.init_n_series_): 943 series_name = self.series_names[series_id] 944 col_name = f"quantile_{q_label}_{series_name}" 945 result_dict[col_name] = q_values[i, :, series_id] 946 947 df_return_quantiles = pd.DataFrame( 948 result_dict, index=self.output_dates_ 949 ) 950 951 return df_return_quantiles 952 953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 print("mean_", mean_) 1218 print("h", h) 1219 print("self.init_n_series_", self.init_n_series_) 1220 self.mean_ = pd.DataFrame( 1221 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1222 ::-1 1223 ], 1224 columns=self.df_.columns[: self.init_n_series_], 1225 index=self.output_dates_, 1226 ) 1227 1228 # function's return ---------------------------------------------------------------------- 1229 if ( 1230 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1231 and (self.type_pi not in ("gaussian", "scp")) 1232 ) or ("vine" in self.type_pi): 1233 if self.replications is None: 1234 return self.mean_.iloc[:, : self.init_n_series_] 1235 1236 # if "return_std" not in kwargs and self.replications is not None 1237 meanf = [] 1238 medianf = [] 1239 lower = [] 1240 upper = [] 1241 1242 if "scp2" in self.type_pi: 1243 if self.verbose == 1: 1244 self.sims_ = tuple( 1245 ( 1246 self.mean_ 1247 + self.residuals_sims_[i] 1248 * self.residuals_std_dev_[np.newaxis, :] 1249 for i in tqdm(range(self.replications)) 1250 ) 1251 ) 1252 elif self.verbose == 0: 1253 self.sims_ = tuple( 1254 ( 1255 self.mean_ 1256 + self.residuals_sims_[i] 1257 * self.residuals_std_dev_[np.newaxis, :] 1258 for i in range(self.replications) 1259 ) 1260 ) 1261 else: 1262 if self.verbose == 1: 1263 self.sims_ = tuple( 1264 ( 1265 self.mean_ + self.residuals_sims_[i] 1266 for i in tqdm(range(self.replications)) 1267 ) 1268 ) 1269 elif self.verbose == 0: 1270 self.sims_ = tuple( 1271 ( 1272 self.mean_ + self.residuals_sims_[i] 1273 for i in range(self.replications) 1274 ) 1275 ) 1276 1277 DescribeResult = namedtuple( 1278 "DescribeResult", ("mean", "sims", "lower", "upper") 1279 ) 1280 for ix in range(self.init_n_series_): 1281 sims_ix = getsims(self.sims_, ix) 1282 if self.agg == "mean": 1283 meanf.append(np.mean(sims_ix, axis=1)) 1284 else: 1285 medianf.append(np.median(sims_ix, axis=1)) 1286 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1287 upper.append( 1288 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1289 ) 1290 print("meanf", meanf) 1291 print("self.series_names", self.series_names) 1292 self.mean_ = pd.DataFrame( 1293 np.asarray(meanf).T, 1294 columns=self.series_names[ 1295 : self.init_n_series_ 1296 ], # self.df_.columns, 1297 index=self.output_dates_, 1298 ) 1299 1300 self.lower_ = pd.DataFrame( 1301 np.asarray(lower).T, 1302 columns=self.series_names[ 1303 : self.init_n_series_ 1304 ], # self.df_.columns, 1305 index=self.output_dates_, 1306 ) 1307 1308 self.upper_ = pd.DataFrame( 1309 np.asarray(upper).T, 1310 columns=self.series_names[ 1311 : self.init_n_series_ 1312 ], # self.df_.columns, 1313 index=self.output_dates_, 1314 ) 1315 1316 try: 1317 self.median_ = pd.DataFrame( 1318 np.asarray(medianf).T, 1319 columns=self.series_names[ 1320 : self.init_n_series_ 1321 ], # self.df_.columns, 1322 index=self.output_dates_, 1323 ) 1324 except Exception as e: 1325 pass 1326 1327 return DescribeResult( 1328 self.mean_, self.sims_, self.lower_, self.upper_ 1329 ) 1330 1331 if ( 1332 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1333 and (self.type_pi not in ("gaussian", "scp")) 1334 ) or "vine" in self.type_pi: 1335 DescribeResult = namedtuple( 1336 "DescribeResult", ("mean", "lower", "upper") 1337 ) 1338 1339 self.mean_ = pd.DataFrame( 1340 np.asarray(self.mean_), 1341 columns=self.series_names, # self.df_.columns, 1342 index=self.output_dates_, 1343 ) 1344 1345 if "return_std" in kwargs: 1346 self.preds_std_ = np.asarray(self.preds_std_) 1347 1348 self.lower_ = pd.DataFrame( 1349 self.mean_.values - pi_multiplier * self.preds_std_, 1350 columns=self.series_names, # self.df_.columns, 1351 index=self.output_dates_, 1352 ) 1353 1354 self.upper_ = pd.DataFrame( 1355 self.mean_.values + pi_multiplier * self.preds_std_, 1356 columns=self.series_names, # self.df_.columns, 1357 index=self.output_dates_, 1358 ) 1359 1360 if "return_pi" in kwargs: 1361 self.lower_ = pd.DataFrame( 1362 np.asarray(lower_pi_).reshape(h, self.n_series) 1363 + y_means_[np.newaxis, :], 1364 columns=self.series_names, # self.df_.columns, 1365 index=self.output_dates_, 1366 ) 1367 1368 self.upper_ = pd.DataFrame( 1369 np.asarray(upper_pi_).reshape(h, self.n_series) 1370 + y_means_[np.newaxis, :], 1371 columns=self.series_names, # self.df_.columns, 1372 index=self.output_dates_, 1373 ) 1374 1375 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1376 1377 print("\n res", res) 1378 1379 if self.xreg_ is not None: 1380 if len(self.xreg_.shape) > 1: 1381 res2 = mx.tuple_map( 1382 res, 1383 lambda x: mo.delete_last_columns( 1384 x, num_columns=self.xreg_.shape[1] 1385 ), 1386 ) 1387 else: 1388 res2 = mx.tuple_map( 1389 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1390 ) 1391 return DescribeResult(res2[0], res2[1], res2[2]) 1392 1393 return res 1394 1395 if self.type_pi == "gaussian": 1396 DescribeResult = namedtuple( 1397 "DescribeResult", ("mean", "lower", "upper") 1398 ) 1399 1400 self.mean_ = pd.DataFrame( 1401 np.asarray(self.mean_), 1402 columns=self.series_names, # self.df_.columns, 1403 index=self.output_dates_, 1404 ) 1405 1406 # Use Bayesian std if available, otherwise use gaussian residual std 1407 if "return_std" in kwargs and len(self.preds_std_) > 0: 1408 preds_std_to_use = np.asarray(self.preds_std_) 1409 else: 1410 preds_std_to_use = self.gaussian_preds_std_ 1411 1412 self.lower_ = pd.DataFrame( 1413 self.mean_.values - pi_multiplier * preds_std_to_use, 1414 columns=self.series_names, # self.df_.columns, 1415 index=self.output_dates_, 1416 ) 1417 1418 self.upper_ = pd.DataFrame( 1419 self.mean_.values + pi_multiplier * preds_std_to_use, 1420 columns=self.series_names, # self.df_.columns, 1421 index=self.output_dates_, 1422 ) 1423 1424 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1425 1426 print("\n res", res) 1427 1428 if self.xreg_ is not None: 1429 if len(self.xreg_.shape) > 1: 1430 res2 = mx.tuple_map( 1431 res, 1432 lambda x: mo.delete_last_columns( 1433 x, num_columns=self.xreg_.shape[1] 1434 ), 1435 ) 1436 else: 1437 res2 = mx.tuple_map( 1438 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1439 ) 1440 return DescribeResult(res2[0], res2[1], res2[2]) 1441 1442 return res 1443 1444 if self.type_pi == "quantile": 1445 DescribeResult = namedtuple("DescribeResult", ("mean")) 1446 1447 self.mean_ = pd.DataFrame( 1448 np.asarray(self.mean_), 1449 columns=self.series_names, # self.df_.columns, 1450 index=self.output_dates_, 1451 ) 1452 1453 res = DescribeResult(self.mean_) 1454 1455 print("\n res", res) 1456 1457 if self.xreg_ is not None: 1458 if len(self.xreg_.shape) > 1: 1459 res2 = mx.tuple_map( 1460 res, 1461 lambda x: mo.delete_last_columns( 1462 x, num_columns=self.xreg_.shape[1] 1463 ), 1464 ) 1465 else: 1466 res2 = mx.tuple_map( 1467 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1468 ) 1469 return DescribeResult(res2[0]) 1470 1471 return res 1472 1473 # After prediction loop, ensure sims only contain target columns 1474 if self.sims_ is not None: 1475 if self.verbose == 1: 1476 self.sims_ = tuple( 1477 sim[:h,] # Only keep target columns and h rows 1478 for sim in tqdm(self.sims_) 1479 ) 1480 elif self.verbose == 0: 1481 self.sims_ = tuple( 1482 sim[:h,] # Only keep target columns and h rows 1483 for sim in self.sims_ 1484 ) 1485 1486 # Convert numpy arrays to DataFrames with proper columns 1487 self.sims_ = tuple( 1488 pd.DataFrame( 1489 sim, 1490 columns=self.df_.columns[: self.init_n_series_], 1491 index=self.output_dates_, 1492 ) 1493 for sim in self.sims_ 1494 ) 1495 1496 if self.type_pi in ( 1497 "kde", 1498 "bootstrap", 1499 "block-bootstrap", 1500 "vine-copula", 1501 ): 1502 if self.xreg_ is not None: 1503 # Use getsimsxreg when external regressors are present 1504 target_cols = self.df_.columns[: self.init_n_series_] 1505 self.sims_ = getsimsxreg( 1506 self.sims_, self.output_dates_, target_cols 1507 ) 1508 else: 1509 # Use original getsims for backward compatibility 1510 self.sims_ = getsims(self.sims_) 1511 1512 def _crps_ensemble(self, y_true, simulations, axis=0): 1513 """ 1514 Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations. 1515 1516 The CRPS is a measure of the distance between the cumulative distribution 1517 function (CDF) of a forecast and the CDF of the observed value. This method 1518 computes the CRPS in a vectorized form for an ensemble of simulations, efficiently 1519 handling the case where there is only one simulation. 1520 1521 Parameters 1522 ---------- 1523 y_true : array_like, shape (n,) 1524 A 1D array of true values (observations). 1525 Each element represents the true value for a given sample. 1526 1527 simulations : array_like, shape (n, R) 1528 A 2D array of simulated values. Each row corresponds to a different sample 1529 and each column corresponds to a different simulation of that sample. 1530 1531 axis : int, optional, default=0 1532 Axis along which to transpose the simulations if needed. 1533 If axis=0, the simulations are transposed to shape (R, n). 1534 1535 Returns 1536 ------- 1537 crps : ndarray, shape (n,) 1538 A 1D array of CRPS scores, one for each sample. 1539 1540 Notes 1541 ----- 1542 The CRPS score is computed as: 1543 1544 CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|] 1545 1546 Where: 1547 - `X` is the ensemble of simulations. 1548 - `y` is the true value. 1549 - `X'` is a second independent sample from the ensemble. 1550 1551 The calculation is vectorized to optimize performance for large datasets. 1552 1553 The edge case where `R=1` (only one simulation) is handled by returning 1554 only `term1` (i.e., no ensemble spread). 1555 """ 1556 sims = np.asarray(simulations) # Convert simulations to numpy array 1557 if axis == 0: 1558 sims = sims.T # Transpose if the axis is 0 1559 n, R = sims.shape # n = number of samples, R = number of simulations 1560 # Term 1: E|X - y|, average absolute difference between simulations and true value 1561 term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1) 1562 # Handle edge case: if R == 1, return term1 (no spread in ensemble) 1563 if R == 1: 1564 return term1 1565 # Term 2: 0.5 * E|X - X'|, using efficient sorted formula 1566 sims_sorted = np.sort(sims, axis=1) # Sort simulations along each row 1567 # Correct coefficients for efficient calculation 1568 j = np.arange(R) # 0-indexed positions in the sorted simulations 1569 coefficients = (2 * (j + 1) - R - 1) / ( 1570 R * (R - 1) 1571 ) # Efficient coefficient calculation 1572 # Dot product along the second axis (over the simulations) 1573 term2 = np.dot(sims_sorted, coefficients) 1574 # Return CRPS score: term1 - 0.5 * term2 1575 return term1 - 0.5 * term2 1576 1577 def score( 1578 self, 1579 X, 1580 training_index, 1581 testing_index, 1582 scoring=None, 1583 alpha=0.5, 1584 **kwargs, 1585 ): 1586 """Train on training_index, score on testing_index.""" 1587 1588 assert ( 1589 bool(set(training_index).intersection(set(testing_index))) == False 1590 ), "Non-overlapping 'training_index' and 'testing_index' required" 1591 1592 # Dimensions 1593 try: 1594 # multivariate time series 1595 n, p = X.shape 1596 except: 1597 # univariate time series 1598 n = X.shape[0] 1599 p = 1 1600 1601 # Training and testing sets 1602 if p > 1: 1603 X_train = X[training_index, :] 1604 X_test = X[testing_index, :] 1605 else: 1606 X_train = X[training_index] 1607 X_test = X[testing_index] 1608 1609 # Horizon 1610 h = len(testing_index) 1611 assert ( 1612 len(training_index) + h 1613 ) <= n, "Please check lengths of training and testing windows" 1614 1615 # Fit and predict 1616 self.fit(X_train, **kwargs) 1617 preds = self.predict(h=h, **kwargs) 1618 1619 if scoring is None: 1620 scoring = "neg_root_mean_squared_error" 1621 1622 if scoring == "pinball": 1623 # Predict requested quantile 1624 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1625 # Handle multivariate 1626 scores = [] 1627 for j in range(p): 1628 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1629 q_label = ( 1630 f"{int(alpha * 100):02d}" 1631 if (alpha * 100).is_integer() 1632 else f"{alpha:.3f}".replace(".", "_") 1633 ) 1634 col = f"quantile_{q_label}_{series_name}" 1635 if col not in q_pred.columns: 1636 raise ValueError( 1637 f"Column '{col}' not found in quantile forecast output." 1638 ) 1639 y_true_j = X_test[:, j] 1640 y_pred_j = q_pred[col].values 1641 # Compute pinball loss for this series 1642 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1643 scores.append(loss) 1644 # Return average over series 1645 return np.mean(scores) 1646 1647 if scoring == "crps": 1648 # Ensure simulations exist 1649 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1650 # Extract simulations: list of DataFrames → (R, h, p) 1651 sims_vals = np.stack( 1652 [sim.values for sim in self.sims_], axis=0 1653 ) # (R, h, p) 1654 crps_scores = [] 1655 for j in range(p): 1656 y_true_j = X_test[:, j] 1657 sims_j = sims_vals[:, :, j] # (R, h) 1658 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1659 crps_scores.append(np.mean(crps_j)) # average over horizon 1660 return np.mean(crps_scores) # average over series 1661 1662 # check inputs 1663 assert scoring in ( 1664 "explained_variance", 1665 "neg_mean_absolute_error", 1666 "neg_mean_squared_error", 1667 "neg_root_mean_squared_error", 1668 "neg_mean_squared_log_error", 1669 "neg_median_absolute_error", 1670 "r2", 1671 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1672 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1673 'neg_median_absolute_error', 'r2')" 1674 1675 scoring_options = { 1676 "explained_variance": skm2.explained_variance_score, 1677 "neg_mean_absolute_error": skm2.mean_absolute_error, 1678 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1679 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1680 np.mean((x - y) ** 2) 1681 ), 1682 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1683 "neg_median_absolute_error": skm2.median_absolute_error, 1684 "r2": skm2.r2_score, 1685 } 1686 1687 return scoring_options[scoring](X_test, preds) 1688 1689 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1690 """Plot time series forecast 1691 1692 Parameters: 1693 1694 series: {integer} or {string} 1695 series index or name 1696 1697 """ 1698 1699 assert all( 1700 [ 1701 self.mean_ is not None, 1702 self.lower_ is not None, 1703 self.upper_ is not None, 1704 self.output_dates_ is not None, 1705 ] 1706 ), "model forecasting must be obtained first (with predict)" 1707 1708 if series is None: 1709 # assert ( 1710 # self.init_n_series_ == 1 1711 # ), "please specify series index or name (n_series > 1)" 1712 series = 0 1713 1714 if isinstance(series, str): 1715 assert ( 1716 series in self.series_names 1717 ), f"series {series} doesn't exist in the input dataset" 1718 series_idx = self.df_.columns.get_loc(series) 1719 else: 1720 assert isinstance(series, int) and ( 1721 0 <= series < self.n_series 1722 ), f"check series index (< {self.n_series})" 1723 series_idx = series 1724 1725 y_all = list(self.df_.iloc[:, series_idx]) + list( 1726 self.mean_.iloc[:, series_idx] 1727 ) 1728 y_test = list(self.mean_.iloc[:, series_idx]) 1729 n_points_all = len(y_all) 1730 n_points_train = self.df_.shape[0] 1731 1732 if type_axis == "numeric": 1733 x_all = [i for i in range(n_points_all)] 1734 x_test = [i for i in range(n_points_train, n_points_all)] 1735 1736 if type_axis == "dates": # use dates 1737 x_all = np.concatenate( 1738 (self.input_dates.values, self.output_dates_.values), axis=None 1739 ) 1740 x_test = self.output_dates_.values 1741 1742 if type_plot == "pi": 1743 fig, ax = plt.subplots() 1744 ax.plot(x_all, y_all, "-") 1745 ax.plot(x_test, y_test, "-", color="orange") 1746 ax.fill_between( 1747 x_test, 1748 self.lower_.iloc[:, series_idx], 1749 self.upper_.iloc[:, series_idx], 1750 alpha=0.2, 1751 color="orange", 1752 ) 1753 if self.replications is None: 1754 if self.n_series > 1: 1755 plt.title( 1756 f"prediction intervals for {series}", 1757 loc="left", 1758 fontsize=12, 1759 fontweight=0, 1760 color="black", 1761 ) 1762 else: 1763 plt.title( 1764 f"prediction intervals for input time series", 1765 loc="left", 1766 fontsize=12, 1767 fontweight=0, 1768 color="black", 1769 ) 1770 plt.show() 1771 else: # self.replications is not None 1772 if self.n_series > 1: 1773 plt.title( 1774 f"prediction intervals for {self.replications} simulations of {series}", 1775 loc="left", 1776 fontsize=12, 1777 fontweight=0, 1778 color="black", 1779 ) 1780 else: 1781 plt.title( 1782 f"prediction intervals for {self.replications} simulations of input time series", 1783 loc="left", 1784 fontsize=12, 1785 fontweight=0, 1786 color="black", 1787 ) 1788 plt.show() 1789 1790 if type_plot == "spaghetti": 1791 palette = plt.get_cmap("Set1") 1792 sims_ix = getsims(self.sims_, series_idx) 1793 plt.plot(x_all, y_all, "-") 1794 for col_ix in range( 1795 sims_ix.shape[1] 1796 ): # avoid this when there are thousands of simulations 1797 plt.plot( 1798 x_test, 1799 sims_ix[:, col_ix], 1800 "-", 1801 color=palette(col_ix), 1802 linewidth=1, 1803 alpha=0.9, 1804 ) 1805 plt.plot(x_all, y_all, "-", color="black") 1806 plt.plot(x_test, y_test, "-", color="blue") 1807 # Add titles 1808 if self.n_series > 1: 1809 plt.title( 1810 f"{self.replications} simulations of {series}", 1811 loc="left", 1812 fontsize=12, 1813 fontweight=0, 1814 color="black", 1815 ) 1816 else: 1817 plt.title( 1818 f"{self.replications} simulations of input time series", 1819 loc="left", 1820 fontsize=12, 1821 fontweight=0, 1822 color="black", 1823 ) 1824 plt.xlabel("Time") 1825 plt.ylabel("Values") 1826 # Show the graph 1827 plt.show() 1828 1829 def cross_val_score( 1830 self, 1831 X, 1832 scoring="root_mean_squared_error", 1833 n_jobs=None, 1834 verbose=0, 1835 xreg=None, 1836 initial_window=5, 1837 horizon=3, 1838 fixed_window=False, 1839 show_progress=True, 1840 level=95, 1841 alpha=0.5, 1842 **kwargs, 1843 ): 1844 """Evaluate a score by time series cross-validation. 1845 1846 Parameters: 1847 1848 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1849 The data to fit. 1850 1851 scoring: str or a function 1852 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1853 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1854 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1855 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1856 1857 n_jobs: int, default=None 1858 Number of jobs to run in parallel. 1859 1860 verbose: int, default=0 1861 The verbosity level. 1862 1863 xreg: array-like, optional (default=None) 1864 Additional (external) regressors to be passed to `fit` 1865 xreg must be in 'increasing' order (most recent observations last) 1866 1867 initial_window: int 1868 initial number of consecutive values in each training set sample 1869 1870 horizon: int 1871 number of consecutive values in test set sample 1872 1873 fixed_window: boolean 1874 if False, all training samples start at index 0, and the training 1875 window's size is increasing. 1876 if True, the training window's size is fixed, and the window is 1877 rolling forward 1878 1879 show_progress: boolean 1880 if True, a progress bar is printed 1881 1882 level: int 1883 confidence level for prediction intervals 1884 1885 alpha: float 1886 quantile level for pinball loss if scoring='pinball' 1887 0 < alpha < 1 1888 1889 **kwargs: dict 1890 additional parameters to be passed to `fit` and `predict` 1891 1892 Returns: 1893 1894 A tuple: descriptive statistics or errors and raw errors 1895 1896 """ 1897 tscv = TimeSeriesSplit() 1898 1899 tscv_obj = tscv.split( 1900 X, 1901 initial_window=initial_window, 1902 horizon=horizon, 1903 fixed_window=fixed_window, 1904 ) 1905 1906 if isinstance(scoring, str): 1907 assert scoring in ( 1908 "pinball", 1909 "crps", 1910 "root_mean_squared_error", 1911 "mean_squared_error", 1912 "mean_error", 1913 "mean_absolute_error", 1914 "mean_percentage_error", 1915 "mean_absolute_percentage_error", 1916 "winkler_score", 1917 "coverage", 1918 ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1919 1920 def err_func(X_test, X_pred, scoring, alpha=0.5): 1921 if (self.replications is not None) or ( 1922 self.type_pi == "gaussian" 1923 ): # probabilistic 1924 if scoring == "pinball": 1925 # Predict requested quantile 1926 q_pred = self.predict( 1927 h=len(X_test), quantiles=[alpha], **kwargs 1928 ) 1929 # Handle multivariate 1930 scores = [] 1931 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1932 for j in range(p): 1933 series_name = getattr( 1934 self, "series_names", [f"Series_{j}"] 1935 )[j] 1936 q_label = ( 1937 f"{int(alpha * 100):02d}" 1938 if (alpha * 100).is_integer() 1939 else f"{alpha:.3f}".replace(".", "_") 1940 ) 1941 col = f"quantile_{q_label}_{series_name}" 1942 if col not in q_pred.columns: 1943 raise ValueError( 1944 f"Column '{col}' not found in quantile forecast output." 1945 ) 1946 try: 1947 y_true_j = X_test[:, j] if p > 1 else X_test 1948 except: 1949 y_true_j = ( 1950 X_test.iloc[:, j] 1951 if p > 1 1952 else X_test.values 1953 ) 1954 y_pred_j = q_pred[col].values 1955 # Compute pinball loss for this series 1956 loss = mean_pinball_loss( 1957 y_true_j, y_pred_j, alpha=alpha 1958 ) 1959 scores.append(loss) 1960 # Return average over series 1961 return np.mean(scores) 1962 elif scoring == "crps": 1963 # Ensure simulations exist 1964 _ = self.predict( 1965 h=len(X_test), **kwargs 1966 ) # triggers self.sims_ 1967 # Extract simulations: list of DataFrames → (R, h, p) 1968 sims_vals = np.stack( 1969 [sim.values for sim in self.sims_], axis=0 1970 ) # (R, h, p) 1971 crps_scores = [] 1972 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1973 for j in range(p): 1974 try: 1975 y_true_j = X_test[:, j] if p > 1 else X_test 1976 except Exception as e: 1977 y_true_j = ( 1978 X_test.iloc[:, j] 1979 if p > 1 1980 else X_test.values 1981 ) 1982 sims_j = sims_vals[:, :, j] # (R, h) 1983 crps_j = self._crps_ensemble( 1984 np.asarray(y_true_j), sims_j 1985 ) 1986 crps_scores.append( 1987 np.mean(crps_j) 1988 ) # average over horizon 1989 return np.mean(crps_scores) # average over series 1990 if scoring == "winkler_score": 1991 return winkler_score(X_pred, X_test, level=level) 1992 elif scoring == "coverage": 1993 return coverage(X_pred, X_test, level=level) 1994 else: 1995 return mean_errors( 1996 pred=X_pred.mean, actual=X_test, scoring=scoring 1997 ) 1998 else: # not probabilistic 1999 return mean_errors( 2000 pred=X_pred, actual=X_test, scoring=scoring 2001 ) 2002 2003 else: # isinstance(scoring, str) = False 2004 err_func = scoring 2005 2006 errors = [] 2007 2008 train_indices = [] 2009 2010 test_indices = [] 2011 2012 for train_index, test_index in tscv_obj: 2013 train_indices.append(train_index) 2014 test_indices.append(test_index) 2015 2016 if show_progress is True: 2017 iterator = tqdm( 2018 zip(train_indices, test_indices), total=len(train_indices) 2019 ) 2020 else: 2021 iterator = zip(train_indices, test_indices) 2022 2023 for train_index, test_index in iterator: 2024 if verbose == 1: 2025 print(f"TRAIN: {train_index}") 2026 print(f"TEST: {test_index}") 2027 2028 if isinstance(X, pd.DataFrame): 2029 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 2030 X_test = X.iloc[test_index, :] 2031 else: 2032 self.fit(X[train_index, :], xreg=xreg, **kwargs) 2033 X_test = X[test_index, :] 2034 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 2035 2036 errors.append(err_func(X_test, X_pred, scoring, alpha=alpha)) 2037 2038 res = np.asarray(errors) 2039 2040 return res, describe(res) 2041 2042 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 2043 """Compute information criterion using existing residuals 2044 2045 Parameters 2046 ---------- 2047 curr_lags : int 2048 Current number of lags being evaluated 2049 criterion : str 2050 One of 'AIC', 'AICc', or 'BIC' 2051 2052 Returns 2053 ------- 2054 float 2055 Information criterion value or inf if parameters exceed observations 2056 """ 2057 # Get dimensions 2058 n_obs = self.residuals_.shape[0] 2059 n_features = int(self.init_n_series_ * curr_lags) 2060 n_hidden = int(self.n_hidden_features) 2061 # Calculate number of parameters 2062 term1 = int(n_features * n_hidden) 2063 term2 = int(n_hidden * self.init_n_series_) 2064 n_params = term1 + term2 2065 # Check if we have enough observations for the number of parameters 2066 if n_obs <= n_params + 1: 2067 return float("inf") # Return infinity if too many parameters 2068 # Compute RSS using existing residuals 2069 rss = np.sum(self.residuals_**2) 2070 # Compute criterion 2071 if criterion == "AIC": 2072 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 2073 elif criterion == "AICc": 2074 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 2075 n_obs / (n_obs - n_params - 1) 2076 ) 2077 else: # BIC 2078 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 2079 2080 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "quantile": use model-agnostic quantile regression under the hood
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
level: int.
level of confidence for `type_pi == 'quantile'` (default is `95`)
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 print("mean_", mean_) 1218 print("h", h) 1219 print("self.init_n_series_", self.init_n_series_) 1220 self.mean_ = pd.DataFrame( 1221 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1222 ::-1 1223 ], 1224 columns=self.df_.columns[: self.init_n_series_], 1225 index=self.output_dates_, 1226 ) 1227 1228 # function's return ---------------------------------------------------------------------- 1229 if ( 1230 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1231 and (self.type_pi not in ("gaussian", "scp")) 1232 ) or ("vine" in self.type_pi): 1233 if self.replications is None: 1234 return self.mean_.iloc[:, : self.init_n_series_] 1235 1236 # if "return_std" not in kwargs and self.replications is not None 1237 meanf = [] 1238 medianf = [] 1239 lower = [] 1240 upper = [] 1241 1242 if "scp2" in self.type_pi: 1243 if self.verbose == 1: 1244 self.sims_ = tuple( 1245 ( 1246 self.mean_ 1247 + self.residuals_sims_[i] 1248 * self.residuals_std_dev_[np.newaxis, :] 1249 for i in tqdm(range(self.replications)) 1250 ) 1251 ) 1252 elif self.verbose == 0: 1253 self.sims_ = tuple( 1254 ( 1255 self.mean_ 1256 + self.residuals_sims_[i] 1257 * self.residuals_std_dev_[np.newaxis, :] 1258 for i in range(self.replications) 1259 ) 1260 ) 1261 else: 1262 if self.verbose == 1: 1263 self.sims_ = tuple( 1264 ( 1265 self.mean_ + self.residuals_sims_[i] 1266 for i in tqdm(range(self.replications)) 1267 ) 1268 ) 1269 elif self.verbose == 0: 1270 self.sims_ = tuple( 1271 ( 1272 self.mean_ + self.residuals_sims_[i] 1273 for i in range(self.replications) 1274 ) 1275 ) 1276 1277 DescribeResult = namedtuple( 1278 "DescribeResult", ("mean", "sims", "lower", "upper") 1279 ) 1280 for ix in range(self.init_n_series_): 1281 sims_ix = getsims(self.sims_, ix) 1282 if self.agg == "mean": 1283 meanf.append(np.mean(sims_ix, axis=1)) 1284 else: 1285 medianf.append(np.median(sims_ix, axis=1)) 1286 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1287 upper.append( 1288 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1289 ) 1290 print("meanf", meanf) 1291 print("self.series_names", self.series_names) 1292 self.mean_ = pd.DataFrame( 1293 np.asarray(meanf).T, 1294 columns=self.series_names[ 1295 : self.init_n_series_ 1296 ], # self.df_.columns, 1297 index=self.output_dates_, 1298 ) 1299 1300 self.lower_ = pd.DataFrame( 1301 np.asarray(lower).T, 1302 columns=self.series_names[ 1303 : self.init_n_series_ 1304 ], # self.df_.columns, 1305 index=self.output_dates_, 1306 ) 1307 1308 self.upper_ = pd.DataFrame( 1309 np.asarray(upper).T, 1310 columns=self.series_names[ 1311 : self.init_n_series_ 1312 ], # self.df_.columns, 1313 index=self.output_dates_, 1314 ) 1315 1316 try: 1317 self.median_ = pd.DataFrame( 1318 np.asarray(medianf).T, 1319 columns=self.series_names[ 1320 : self.init_n_series_ 1321 ], # self.df_.columns, 1322 index=self.output_dates_, 1323 ) 1324 except Exception as e: 1325 pass 1326 1327 return DescribeResult( 1328 self.mean_, self.sims_, self.lower_, self.upper_ 1329 ) 1330 1331 if ( 1332 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1333 and (self.type_pi not in ("gaussian", "scp")) 1334 ) or "vine" in self.type_pi: 1335 DescribeResult = namedtuple( 1336 "DescribeResult", ("mean", "lower", "upper") 1337 ) 1338 1339 self.mean_ = pd.DataFrame( 1340 np.asarray(self.mean_), 1341 columns=self.series_names, # self.df_.columns, 1342 index=self.output_dates_, 1343 ) 1344 1345 if "return_std" in kwargs: 1346 self.preds_std_ = np.asarray(self.preds_std_) 1347 1348 self.lower_ = pd.DataFrame( 1349 self.mean_.values - pi_multiplier * self.preds_std_, 1350 columns=self.series_names, # self.df_.columns, 1351 index=self.output_dates_, 1352 ) 1353 1354 self.upper_ = pd.DataFrame( 1355 self.mean_.values + pi_multiplier * self.preds_std_, 1356 columns=self.series_names, # self.df_.columns, 1357 index=self.output_dates_, 1358 ) 1359 1360 if "return_pi" in kwargs: 1361 self.lower_ = pd.DataFrame( 1362 np.asarray(lower_pi_).reshape(h, self.n_series) 1363 + y_means_[np.newaxis, :], 1364 columns=self.series_names, # self.df_.columns, 1365 index=self.output_dates_, 1366 ) 1367 1368 self.upper_ = pd.DataFrame( 1369 np.asarray(upper_pi_).reshape(h, self.n_series) 1370 + y_means_[np.newaxis, :], 1371 columns=self.series_names, # self.df_.columns, 1372 index=self.output_dates_, 1373 ) 1374 1375 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1376 1377 print("\n res", res) 1378 1379 if self.xreg_ is not None: 1380 if len(self.xreg_.shape) > 1: 1381 res2 = mx.tuple_map( 1382 res, 1383 lambda x: mo.delete_last_columns( 1384 x, num_columns=self.xreg_.shape[1] 1385 ), 1386 ) 1387 else: 1388 res2 = mx.tuple_map( 1389 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1390 ) 1391 return DescribeResult(res2[0], res2[1], res2[2]) 1392 1393 return res 1394 1395 if self.type_pi == "gaussian": 1396 DescribeResult = namedtuple( 1397 "DescribeResult", ("mean", "lower", "upper") 1398 ) 1399 1400 self.mean_ = pd.DataFrame( 1401 np.asarray(self.mean_), 1402 columns=self.series_names, # self.df_.columns, 1403 index=self.output_dates_, 1404 ) 1405 1406 # Use Bayesian std if available, otherwise use gaussian residual std 1407 if "return_std" in kwargs and len(self.preds_std_) > 0: 1408 preds_std_to_use = np.asarray(self.preds_std_) 1409 else: 1410 preds_std_to_use = self.gaussian_preds_std_ 1411 1412 self.lower_ = pd.DataFrame( 1413 self.mean_.values - pi_multiplier * preds_std_to_use, 1414 columns=self.series_names, # self.df_.columns, 1415 index=self.output_dates_, 1416 ) 1417 1418 self.upper_ = pd.DataFrame( 1419 self.mean_.values + pi_multiplier * preds_std_to_use, 1420 columns=self.series_names, # self.df_.columns, 1421 index=self.output_dates_, 1422 ) 1423 1424 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1425 1426 print("\n res", res) 1427 1428 if self.xreg_ is not None: 1429 if len(self.xreg_.shape) > 1: 1430 res2 = mx.tuple_map( 1431 res, 1432 lambda x: mo.delete_last_columns( 1433 x, num_columns=self.xreg_.shape[1] 1434 ), 1435 ) 1436 else: 1437 res2 = mx.tuple_map( 1438 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1439 ) 1440 return DescribeResult(res2[0], res2[1], res2[2]) 1441 1442 return res 1443 1444 if self.type_pi == "quantile": 1445 DescribeResult = namedtuple("DescribeResult", ("mean")) 1446 1447 self.mean_ = pd.DataFrame( 1448 np.asarray(self.mean_), 1449 columns=self.series_names, # self.df_.columns, 1450 index=self.output_dates_, 1451 ) 1452 1453 res = DescribeResult(self.mean_) 1454 1455 print("\n res", res) 1456 1457 if self.xreg_ is not None: 1458 if len(self.xreg_.shape) > 1: 1459 res2 = mx.tuple_map( 1460 res, 1461 lambda x: mo.delete_last_columns( 1462 x, num_columns=self.xreg_.shape[1] 1463 ), 1464 ) 1465 else: 1466 res2 = mx.tuple_map( 1467 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1468 ) 1469 return DescribeResult(res2[0]) 1470 1471 return res 1472 1473 # After prediction loop, ensure sims only contain target columns 1474 if self.sims_ is not None: 1475 if self.verbose == 1: 1476 self.sims_ = tuple( 1477 sim[:h,] # Only keep target columns and h rows 1478 for sim in tqdm(self.sims_) 1479 ) 1480 elif self.verbose == 0: 1481 self.sims_ = tuple( 1482 sim[:h,] # Only keep target columns and h rows 1483 for sim in self.sims_ 1484 ) 1485 1486 # Convert numpy arrays to DataFrames with proper columns 1487 self.sims_ = tuple( 1488 pd.DataFrame( 1489 sim, 1490 columns=self.df_.columns[: self.init_n_series_], 1491 index=self.output_dates_, 1492 ) 1493 for sim in self.sims_ 1494 ) 1495 1496 if self.type_pi in ( 1497 "kde", 1498 "bootstrap", 1499 "block-bootstrap", 1500 "vine-copula", 1501 ): 1502 if self.xreg_ is not None: 1503 # Use getsimsxreg when external regressors are present 1504 target_cols = self.df_.columns[: self.init_n_series_] 1505 self.sims_ = getsimsxreg( 1506 self.sims_, self.output_dates_, target_cols 1507 ) 1508 else: 1509 # Use original getsims for backward compatibility 1510 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1577 def score( 1578 self, 1579 X, 1580 training_index, 1581 testing_index, 1582 scoring=None, 1583 alpha=0.5, 1584 **kwargs, 1585 ): 1586 """Train on training_index, score on testing_index.""" 1587 1588 assert ( 1589 bool(set(training_index).intersection(set(testing_index))) == False 1590 ), "Non-overlapping 'training_index' and 'testing_index' required" 1591 1592 # Dimensions 1593 try: 1594 # multivariate time series 1595 n, p = X.shape 1596 except: 1597 # univariate time series 1598 n = X.shape[0] 1599 p = 1 1600 1601 # Training and testing sets 1602 if p > 1: 1603 X_train = X[training_index, :] 1604 X_test = X[testing_index, :] 1605 else: 1606 X_train = X[training_index] 1607 X_test = X[testing_index] 1608 1609 # Horizon 1610 h = len(testing_index) 1611 assert ( 1612 len(training_index) + h 1613 ) <= n, "Please check lengths of training and testing windows" 1614 1615 # Fit and predict 1616 self.fit(X_train, **kwargs) 1617 preds = self.predict(h=h, **kwargs) 1618 1619 if scoring is None: 1620 scoring = "neg_root_mean_squared_error" 1621 1622 if scoring == "pinball": 1623 # Predict requested quantile 1624 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1625 # Handle multivariate 1626 scores = [] 1627 for j in range(p): 1628 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1629 q_label = ( 1630 f"{int(alpha * 100):02d}" 1631 if (alpha * 100).is_integer() 1632 else f"{alpha:.3f}".replace(".", "_") 1633 ) 1634 col = f"quantile_{q_label}_{series_name}" 1635 if col not in q_pred.columns: 1636 raise ValueError( 1637 f"Column '{col}' not found in quantile forecast output." 1638 ) 1639 y_true_j = X_test[:, j] 1640 y_pred_j = q_pred[col].values 1641 # Compute pinball loss for this series 1642 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1643 scores.append(loss) 1644 # Return average over series 1645 return np.mean(scores) 1646 1647 if scoring == "crps": 1648 # Ensure simulations exist 1649 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1650 # Extract simulations: list of DataFrames → (R, h, p) 1651 sims_vals = np.stack( 1652 [sim.values for sim in self.sims_], axis=0 1653 ) # (R, h, p) 1654 crps_scores = [] 1655 for j in range(p): 1656 y_true_j = X_test[:, j] 1657 sims_j = sims_vals[:, :, j] # (R, h) 1658 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1659 crps_scores.append(np.mean(crps_j)) # average over horizon 1660 return np.mean(crps_scores) # average over series 1661 1662 # check inputs 1663 assert scoring in ( 1664 "explained_variance", 1665 "neg_mean_absolute_error", 1666 "neg_mean_squared_error", 1667 "neg_root_mean_squared_error", 1668 "neg_mean_squared_log_error", 1669 "neg_median_absolute_error", 1670 "r2", 1671 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1672 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1673 'neg_median_absolute_error', 'r2')" 1674 1675 scoring_options = { 1676 "explained_variance": skm2.explained_variance_score, 1677 "neg_mean_absolute_error": skm2.mean_absolute_error, 1678 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1679 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1680 np.mean((x - y) ** 2) 1681 ), 1682 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1683 "neg_median_absolute_error": skm2.median_absolute_error, 1684 "r2": skm2.r2_score, 1685 } 1686 1687 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
9class MTSStacker: 10 """ 11 Sequential stacking for time series with unified strategy. 12 13 Core Strategy: 14 1. Split data: half1 (base models) | half2 (meta-model) 15 2. Train base models on half1, predict half2 16 3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] 17 4. Train meta-MTS on half2 with augmented data (via multivariate or xreg) 18 5. Retrain base models on half2 for temporal alignment 19 6. At prediction: base models forecast → augment → meta-model predicts 20 21 Two modes of augmentation: 22 - multivariate: Stack as additional time series, extract target series 23 - xreg: Pass base predictions as external regressors 24 """ 25 26 def __init__( 27 self, 28 base_models, 29 meta_model_mts, 30 split_ratio=0.6, 31 mode="multivariate", 32 target_series=0, 33 ): 34 """ 35 Parameters 36 ---------- 37 base_models : list of sklearn-compatible models 38 Base models (e.g., Ridge, Lasso, RandomForest) 39 meta_model_mts : nnetsauce.MTS instance 40 MTS with type_pi='scp2-kde' or similar 41 split_ratio : float 42 Proportion for half1 (default: 0.5) 43 mode : str 44 'multivariate': stack as additional series 45 'xreg': pass as external regressors 46 target_series : int 47 Which series to extract in multivariate mode (default: 0, the original) 48 """ 49 assert 0 < split_ratio < 1, "split_ratio must be between 0 and 1" 50 assert mode in ( 51 "multivariate", 52 "xreg", 53 ), "mode must be 'multivariate' or 'xreg'" 54 55 self.base_models = base_models 56 self.meta_model = meta_model_mts 57 self.split_ratio = split_ratio 58 self.mode = mode 59 self.target_series = target_series 60 self.fitted_base_models_ = [] 61 self.split_idx_ = None 62 63 def _train_base_models(self, y_train): 64 """Train base models and return fitted MTS objects.""" 65 from nnetsauce import MTS 66 67 fitted_models = [] 68 69 for i, base_model in enumerate(self.base_models): 70 print( 71 f"Training base model {i+1}/{len(self.base_models)}: {type(base_model).__name__}" 72 ) 73 74 base_mts = MTS( 75 obj=clone(base_model), 76 lags=self.meta_model.lags, 77 type_pi="gaussian", 78 verbose=0, 79 show_progress=False, 80 ) 81 82 base_mts.fit(y_train) 83 fitted_models.append(base_mts) 84 85 return fitted_models 86 87 def _get_base_predictions(self, fitted_models, h): 88 """Generate predictions from fitted base models.""" 89 predictions = [] 90 91 for fitted_base_mts in fitted_models: 92 print(f"fitted_base_mts: {fitted_base_mts}") 93 preds = fitted_base_mts.predict(h=h) 94 95 if hasattr(preds, "mean"): 96 preds_array = preds.mean.values 97 else: 98 preds_array = preds.values 99 100 predictions.append(preds_array) 101 102 return predictions 103 104 def fit(self, y, **kwargs): 105 """Fit stacking ensemble with sequential split.""" 106 107 if y.ndim == 1: 108 y = y.reshape(-1, 1) 109 110 n = len(y) 111 self.split_idx_ = int(n * self.split_ratio) 112 113 y_half1 = y[: self.split_idx_] 114 y_half2 = y[self.split_idx_:] 115 116 print(f"Half1: {len(y_half1)} obs | Half2: {len(y_half2)} obs") 117 print(f"Mode: {self.mode}") 118 119 # Step 1: Train base models on half1 120 fitted_base_half1 = self._train_base_models(y_half1) 121 122 # Step 2: Get base predictions on half2 123 base_preds_half2 = self._get_base_predictions( 124 fitted_base_half1, len(y_half2) 125 ) 126 127 # Step 3: Train meta-model based on mode 128 if self.mode == "xreg": 129 xreg_half2 = pd.DataFrame( 130 np.hstack(base_preds_half2), 131 columns=[ 132 "meta_pred" + str(i) for i in range(len(self.base_models)) 133 ], 134 ) 135 print(f"\nMeta-training with xreg shape: {xreg_half2.shape}") 136 print(f"Correlation matrix:\n{np.corrcoef(xreg_half2.T)}") 137 self.meta_model.fit(y_half2, xreg=xreg_half2, **kwargs) 138 139 else: # multivariate 140 multivariate_half2 = np.hstack([y_half2] + base_preds_half2) 141 print(f"\nMultivariate shape: {multivariate_half2.shape}") 142 print(f"Correlation matrix:\n{np.corrcoef(multivariate_half2.T)}") 143 self.meta_model.fit(multivariate_half2, **kwargs) 144 145 # Step 4: Retrain base models on half2 for temporal alignment 146 print(f"\nRetraining base models on half2 for prediction...") 147 self.fitted_base_models_ = self._train_base_models(y_half2) 148 149 return self 150 151 def predict(self, h=5, **kwargs): 152 """Generate h-step ahead forecasts with conformal intervals.""" 153 154 # Step 1: Get fresh base predictions 155 base_forecasts = self._get_base_predictions(self.fitted_base_models_, h) 156 157 # Step 2: Meta-model prediction based on mode 158 if self.mode == "xreg": 159 # Create future xreg from base model forecasts 160 xreg_future = pd.DataFrame( 161 np.hstack(base_forecasts), 162 columns=[f"meta_pred{i}" for i in range(len(self.base_models))], 163 ) 164 print(f"\nPredicting with xreg shape: {xreg_future.shape}") 165 # MTS expects 'xreg' in kwargs, not 'new_xreg' 166 return self.meta_model.predict(h=h, xreg=xreg_future, **kwargs) 167 168 else: # multivariate 169 # Meta-model forecasts all series jointly from internal state 170 # Base forecasts were only used to create training features 171 forecast_all = self.meta_model.predict(h=h, **kwargs) 172 173 # Extract target series 174 return self._extract_target_series(forecast_all) 175 176 def _extract_target_series(self, forecast_all): 177 """Extract target series from multivariate forecast.""" 178 DescribeResult = namedtuple( 179 "DescribeResult", ("mean", "lower", "upper", "sims") 180 ) 181 182 target_idx = self.target_series 183 184 # Handle different return types 185 mean = ( 186 forecast_all.mean.iloc[:, target_idx: target_idx + 1] 187 if hasattr(forecast_all, "mean") 188 else None 189 ) 190 lower = ( 191 forecast_all.lower.iloc[:, target_idx: target_idx + 1] 192 if hasattr(forecast_all, "lower") 193 else None 194 ) 195 upper = ( 196 forecast_all.upper.iloc[:, target_idx: target_idx + 1] 197 if hasattr(forecast_all, "upper") 198 else None 199 ) 200 201 sims = None 202 if hasattr(forecast_all, "sims") and forecast_all.sims is not None: 203 sims = tuple( 204 sim.iloc[:, target_idx: target_idx + 1] 205 for sim in forecast_all.sims 206 ) 207 208 return DescribeResult(mean=mean, lower=lower, upper=upper, sims=sims)
Sequential stacking for time series with unified strategy.
Core Strategy:
- Split data: half1 (base models) | half2 (meta-model)
- Train base models on half1, predict half2
- Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...]
- Train meta-MTS on half2 with augmented data (via multivariate or xreg)
- Retrain base models on half2 for temporal alignment
- At prediction: base models forecast → augment → meta-model predicts
Two modes of augmentation:
- multivariate: Stack as additional time series, extract target series
- xreg: Pass base predictions as external regressors
104 def fit(self, y, **kwargs): 105 """Fit stacking ensemble with sequential split.""" 106 107 if y.ndim == 1: 108 y = y.reshape(-1, 1) 109 110 n = len(y) 111 self.split_idx_ = int(n * self.split_ratio) 112 113 y_half1 = y[: self.split_idx_] 114 y_half2 = y[self.split_idx_:] 115 116 print(f"Half1: {len(y_half1)} obs | Half2: {len(y_half2)} obs") 117 print(f"Mode: {self.mode}") 118 119 # Step 1: Train base models on half1 120 fitted_base_half1 = self._train_base_models(y_half1) 121 122 # Step 2: Get base predictions on half2 123 base_preds_half2 = self._get_base_predictions( 124 fitted_base_half1, len(y_half2) 125 ) 126 127 # Step 3: Train meta-model based on mode 128 if self.mode == "xreg": 129 xreg_half2 = pd.DataFrame( 130 np.hstack(base_preds_half2), 131 columns=[ 132 "meta_pred" + str(i) for i in range(len(self.base_models)) 133 ], 134 ) 135 print(f"\nMeta-training with xreg shape: {xreg_half2.shape}") 136 print(f"Correlation matrix:\n{np.corrcoef(xreg_half2.T)}") 137 self.meta_model.fit(y_half2, xreg=xreg_half2, **kwargs) 138 139 else: # multivariate 140 multivariate_half2 = np.hstack([y_half2] + base_preds_half2) 141 print(f"\nMultivariate shape: {multivariate_half2.shape}") 142 print(f"Correlation matrix:\n{np.corrcoef(multivariate_half2.T)}") 143 self.meta_model.fit(multivariate_half2, **kwargs) 144 145 # Step 4: Retrain base models on half2 for temporal alignment 146 print(f"\nRetraining base models on half2 for prediction...") 147 self.fitted_base_models_ = self._train_base_models(y_half2) 148 149 return self
Fit stacking ensemble with sequential split.
151 def predict(self, h=5, **kwargs): 152 """Generate h-step ahead forecasts with conformal intervals.""" 153 154 # Step 1: Get fresh base predictions 155 base_forecasts = self._get_base_predictions(self.fitted_base_models_, h) 156 157 # Step 2: Meta-model prediction based on mode 158 if self.mode == "xreg": 159 # Create future xreg from base model forecasts 160 xreg_future = pd.DataFrame( 161 np.hstack(base_forecasts), 162 columns=[f"meta_pred{i}" for i in range(len(self.base_models))], 163 ) 164 print(f"\nPredicting with xreg shape: {xreg_future.shape}") 165 # MTS expects 'xreg' in kwargs, not 'new_xreg' 166 return self.meta_model.predict(h=h, xreg=xreg_future, **kwargs) 167 168 else: # multivariate 169 # Meta-model forecasts all series jointly from internal state 170 # Base forecasts were only used to create training features 171 forecast_all = self.meta_model.predict(h=h, **kwargs) 172 173 # Extract target series 174 return self._extract_target_series(forecast_all)
Generate h-step ahead forecasts with conformal intervals.
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 229 230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 276 277 def decision_function(self, X, **kwargs): 278 """Compute the decision function of X. 279 280 Parameters: 281 X: {array-like}, shape = [n_samples, n_features] 282 Samples to compute decision function for. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 array-like of shape (n_samples,) or (n_samples, n_classes) 289 Decision function of the input samples. The order of outputs is the same 290 as that of the classes passed to fit. 291 """ 292 if not hasattr(self.obj, "decision_function"): 293 # If base classifier doesn't have decision_function, use predict_proba 294 proba = self.predict_proba(X, **kwargs) 295 if proba.shape[1] == 2: 296 return proba[:, 1] # For binary classification 297 return proba # For multiclass 298 299 if len(X.shape) == 1: 300 n_features = X.shape[0] 301 new_X = mo.rbind( 302 X.reshape(1, n_features), 303 np.ones(n_features).reshape(1, n_features), 304 ) 305 306 return ( 307 self.obj.decision_function( 308 self.cook_test_set(new_X, **kwargs), **kwargs 309 ) 310 )[0] 311 312 return self.obj.decision_function( 313 self.cook_test_set(X, **kwargs), **kwargs 314 ) 315 316 @property 317 def _estimator_type(self): 318 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
110class NeuralNetRegressor(BaseEstimator, RegressorMixin): 111 """ 112 (Pretrained) Neural Network Regressor. 113 114 Parameters: 115 116 hidden_layer_sizes : tuple, default=(100,) 117 The number of neurons in each hidden layer. 118 max_iter : int, default=100 119 The maximum number of iterations to train the model. 120 learning_rate : float, default=0.01 121 The learning rate for the optimizer. 122 l1_ratio : float, default=0.5 123 The ratio of L1 regularization. 124 alpha : float, default=1e-6 125 The regularization parameter. 126 activation_name : str, default="relu" 127 The activation function to use. 128 dropout : float, default=0.0 129 The dropout rate. 130 random_state : int, default=None 131 The random state for the random number generator. 132 weights : list, default=None 133 The weights to initialize the model with. 134 135 Attributes: 136 137 weights : list 138 The weights of the model. 139 params : list 140 The parameters of the model. 141 scaler_ : sklearn.preprocessing.StandardScaler 142 The scaler used to standardize the input features. 143 y_mean_ : float 144 The mean of the target variable. 145 146 Methods: 147 148 fit(X, y) 149 Fit the model to the data. 150 predict(X) 151 Predict the target variable. 152 get_weights() 153 Get the weights of the model. 154 set_weights(weights) 155 Set the weights of the model. 156 """ 157 158 def __init__( 159 self, 160 hidden_layer_sizes=None, 161 max_iter=100, 162 learning_rate=0.01, 163 l1_ratio=0.5, 164 alpha=1e-6, 165 activation_name="relu", 166 dropout=0, 167 weights=None, 168 random_state=None, 169 ): 170 if weights is None and hidden_layer_sizes is None: 171 hidden_layer_sizes = (100,) # default value if neither is provided 172 self.hidden_layer_sizes = hidden_layer_sizes 173 self.max_iter = max_iter 174 self.learning_rate = learning_rate 175 self.l1_ratio = l1_ratio 176 self.alpha = alpha 177 self.activation_name = activation_name 178 self.dropout = dropout 179 self.weights = weights 180 self.random_state = random_state 181 self.params = None 182 self.scaler_ = StandardScaler() 183 self.y_mean_ = None 184 185 def _validate_weights(self, input_dim): 186 """Validate that weights dimensions are coherent.""" 187 if not self.weights: 188 return False 189 190 try: 191 # Check each layer's weights and biases 192 prev_dim = input_dim 193 for W, b in self.weights: 194 # Check weight matrix dimensions 195 if W.shape[0] != prev_dim: 196 raise ValueError( 197 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 198 ) 199 # Check bias dimension matches weight matrix output 200 if W.shape[1] != b.shape[0]: 201 raise ValueError( 202 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 203 ) 204 prev_dim = W.shape[1] 205 206 # Check final output dimension is 1 for regression 207 if prev_dim != 1: 208 raise ValueError( 209 f"Final layer output dimension {prev_dim} must be 1 for regression" 210 ) 211 212 return True 213 except (AttributeError, IndexError): 214 raise ValueError( 215 "Weights format is invalid. Expected list of (weight, bias) tuples" 216 ) 217 218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self 255 256 def get_weights(self): 257 """Return the current weights of the model.""" 258 if self.weights is None: 259 raise ValueError( 260 "No weights available. Model has not been fitted yet." 261 ) 262 return self.weights 263 264 def set_weights(self, weights): 265 """Set the weights of the model manually.""" 266 self.weights = weights 267 self.params = weights 268 269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self
269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 _estimator_type = "classifier" 61 62 def __init__( 63 self, 64 hidden_layer_sizes=(100,), 65 max_iter=100, 66 learning_rate=0.01, 67 weights=None, 68 l1_ratio=0.5, 69 alpha=1e-6, 70 activation_name="relu", 71 dropout=0.0, 72 random_state=None, 73 ): 74 self.hidden_layer_sizes = hidden_layer_sizes 75 self.max_iter = max_iter 76 self.learning_rate = learning_rate 77 self.weights = weights 78 self.l1_ratio = l1_ratio 79 self.alpha = alpha 80 self.activation_name = activation_name 81 self.dropout = dropout 82 self.random_state = random_state 83 self.regr = None 84 85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self 122 123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X) 133 134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X) 144 145 @property 146 def _estimator_type(self): 147 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
20class PredictionInterval(BaseEstimator, RegressorMixin): 21 """Class PredictionInterval: Obtain prediction intervals. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction intervals. 30 Currently "splitconformal" (default) and "localconformal" 31 32 level: a float; 33 Confidence level for prediction intervals. Default is 95, 34 equivalent to a miscoverage error of 5 (%) 35 36 replications: an integer; 37 Number of replications for simulated conformal (default is `None`) 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation) 42 for type_pi in: 43 - 'bootstrap': Bootstrap resampling. 44 - 'kde': Kernel Density Estimation. 45 46 type_split: a string; 47 "random" (random split of data) or "sequential" (sequential split of data) 48 49 seed: an integer; 50 Reproducibility of fit (there's a random split between fitting and calibration data) 51 """ 52 53 def __init__( 54 self, 55 obj, 56 method="splitconformal", 57 level=95, 58 type_pi=None, 59 type_split="random", 60 replications=None, 61 kernel=None, 62 agg="mean", 63 seed=123, 64 ): 65 self.obj = obj 66 self.method = method 67 self.level = level 68 self.type_pi = type_pi 69 self.type_split = type_split 70 self.replications = replications 71 self.kernel = kernel 72 self.agg = agg 73 self.seed = seed 74 self.alpha_ = 1 - self.level / 100 75 self.quantile_ = None 76 self.icp_ = None 77 self.calibrated_residuals_ = None 78 self.scaled_calibrated_residuals_ = None 79 self.calibrated_residuals_scaler_ = None 80 self.kde_ = None 81 self.aic_ = None 82 self.aicc_ = None 83 self.bic_ = None 84 self.sse_ = None 85 86 def fit(self, X, y, sample_weight=None, **kwargs): 87 """Fit the `method` to training data (X, y). 88 89 Args: 90 91 X: array-like, shape = [n_samples, n_features]; 92 Training set vectors, where n_samples is the number 93 of samples and n_features is the number of features. 94 95 y: array-like, shape = [n_samples, ]; Target values. 96 97 sample_weight: array-like, shape = [n_samples] 98 Sample weights. 99 100 """ 101 102 if self.type_split == "random": 103 X_train, X_calibration, y_train, y_calibration = train_test_split( 104 X, y, test_size=0.5, random_state=self.seed 105 ) 106 107 elif self.type_split == "sequential": 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 mad_estimator = ExtraTreesRegressor() 145 normalizer = RegressorNormalizer( 146 self.obj, mad_estimator, AbsErrorErrFunc() 147 ) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = ( 162 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 163 ) 164 165 # Calculate AIC 166 n_samples = len(y_calibration) 167 temp = n_samples * np.log(self.sse_ / n_samples) 168 self.aic_ = temp + 2 * n_params 169 self.bic_ = temp + np.log(n_samples) * n_params 170 171 return self 172 173 def predict(self, X, return_pi=False): 174 """Obtain predictions and prediction intervals 175 176 Args: 177 178 X: array-like, shape = [n_samples, n_features]; 179 Testing set vectors, where n_samples is the number 180 of samples and n_features is the number of features. 181 182 return_pi: boolean 183 Whether the prediction interval is returned or not. 184 Default is False, for compatibility with other _estimators_. 185 If True, a tuple containing the predictions + lower and upper 186 bounds is returned. 187 188 """ 189 190 if self.method == "splitconformal": 191 pred = self.obj.predict(X) 192 193 if self.method == "localconformal": 194 pred = self.icp_.predict(X) 195 196 if self.method == "splitconformal": 197 if ( 198 self.replications is None and self.type_pi is None 199 ): # type_pi is not used here, no bootstrap or kde 200 if return_pi: 201 DescribeResult = namedtuple( 202 "DescribeResult", ("mean", "lower", "upper") 203 ) 204 return DescribeResult( 205 pred, pred - self.quantile_, pred + self.quantile_ 206 ) 207 208 else: 209 return pred 210 211 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 212 raise NotImplementedError 213 214 if self.type_pi is None: 215 self.type_pi = "kde" 216 raise Warning("type_pi must be set, setting to 'kde'") 217 218 if self.replications is None: 219 self.replications = 100 220 raise Warning("replications must be set, setting to 100") 221 222 assert self.type_pi in ( 223 "bootstrap", 224 "kde", 225 "normal", 226 "ecdf", 227 "permutation", 228 "smooth-bootstrap", 229 ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')" 230 231 if self.type_pi == "bootstrap": 232 np.random.seed(self.seed) 233 self.residuals_sims_ = np.asarray( 234 [ 235 np.random.choice( 236 a=self.scaled_calibrated_residuals_, 237 size=X.shape[0], 238 ) 239 for _ in range(self.replications) 240 ] 241 ).T 242 self.sims_ = np.asarray( 243 [ 244 pred 245 + self.calibrated_residuals_scaler_.scale_[0] 246 * self.residuals_sims_[:, i].ravel() 247 for i in range(self.replications) 248 ] 249 ).T 250 elif self.type_pi == "kde": 251 self.kde_ = gaussian_kde( 252 dataset=self.scaled_calibrated_residuals_ 253 ) 254 self.sims_ = np.asarray( 255 [ 256 pred 257 + self.calibrated_residuals_scaler_.scale_[0] 258 * self.kde_.resample( 259 size=X.shape[0], seed=self.seed + i 260 ).ravel() 261 for i in range(self.replications) 262 ] 263 ).T 264 else: # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap" 265 self.residuals_sims_ = np.asarray( 266 simulate_replications( 267 data=self.scaled_calibrated_residuals_, 268 method=self.type_pi, 269 num_replications=self.replications, 270 n_obs=X.shape[0], 271 seed=self.seed, 272 ) 273 ).T 274 self.sims_ = np.asarray( 275 [ 276 pred 277 + self.calibrated_residuals_scaler_.scale_[0] 278 * self.residuals_sims_[:, i].ravel() 279 for i in range(self.replications) 280 ] 281 ).T 282 283 self.mean_ = np.mean(self.sims_, axis=1) 284 self.lower_ = np.quantile( 285 self.sims_, q=self.alpha_ / 200, axis=1 286 ) 287 self.upper_ = np.quantile( 288 self.sims_, q=1 - self.alpha_ / 200, axis=1 289 ) 290 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "sims", "lower", "upper") 293 ) 294 295 return DescribeResult( 296 self.mean_, self.sims_, self.lower_, self.upper_ 297 ) 298 299 if self.method == "localconformal": 300 if self.replications is None: 301 if return_pi: 302 predictions_bounds = self.icp_.predict( 303 X, significance=1 - self.level 304 ) 305 DescribeResult = namedtuple( 306 "DescribeResult", ("mean", "lower", "upper") 307 ) 308 return DescribeResult( 309 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 310 ) 311 312 else: 313 return pred 314 315 else: # (self.method == "localconformal") and if self.replications is not None 316 raise NotImplementedError( 317 "When self.method == 'localconformal', there are no simulations" 318 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`)
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation)
for type_pi in:
- 'bootstrap': Bootstrap resampling.
- 'kde': Kernel Density Estimation.
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
86 def fit(self, X, y, sample_weight=None, **kwargs): 87 """Fit the `method` to training data (X, y). 88 89 Args: 90 91 X: array-like, shape = [n_samples, n_features]; 92 Training set vectors, where n_samples is the number 93 of samples and n_features is the number of features. 94 95 y: array-like, shape = [n_samples, ]; Target values. 96 97 sample_weight: array-like, shape = [n_samples] 98 Sample weights. 99 100 """ 101 102 if self.type_split == "random": 103 X_train, X_calibration, y_train, y_calibration = train_test_split( 104 X, y, test_size=0.5, random_state=self.seed 105 ) 106 107 elif self.type_split == "sequential": 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 mad_estimator = ExtraTreesRegressor() 145 normalizer = RegressorNormalizer( 146 self.obj, mad_estimator, AbsErrorErrFunc() 147 ) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = ( 162 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 163 ) 164 165 # Calculate AIC 166 n_samples = len(y_calibration) 167 temp = n_samples * np.log(self.sse_ / n_samples) 168 self.aic_ = temp + 2 * n_params 169 self.bic_ = temp + np.log(n_samples) * n_params 170 171 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
173 def predict(self, X, return_pi=False): 174 """Obtain predictions and prediction intervals 175 176 Args: 177 178 X: array-like, shape = [n_samples, n_features]; 179 Testing set vectors, where n_samples is the number 180 of samples and n_features is the number of features. 181 182 return_pi: boolean 183 Whether the prediction interval is returned or not. 184 Default is False, for compatibility with other _estimators_. 185 If True, a tuple containing the predictions + lower and upper 186 bounds is returned. 187 188 """ 189 190 if self.method == "splitconformal": 191 pred = self.obj.predict(X) 192 193 if self.method == "localconformal": 194 pred = self.icp_.predict(X) 195 196 if self.method == "splitconformal": 197 if ( 198 self.replications is None and self.type_pi is None 199 ): # type_pi is not used here, no bootstrap or kde 200 if return_pi: 201 DescribeResult = namedtuple( 202 "DescribeResult", ("mean", "lower", "upper") 203 ) 204 return DescribeResult( 205 pred, pred - self.quantile_, pred + self.quantile_ 206 ) 207 208 else: 209 return pred 210 211 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 212 raise NotImplementedError 213 214 if self.type_pi is None: 215 self.type_pi = "kde" 216 raise Warning("type_pi must be set, setting to 'kde'") 217 218 if self.replications is None: 219 self.replications = 100 220 raise Warning("replications must be set, setting to 100") 221 222 assert self.type_pi in ( 223 "bootstrap", 224 "kde", 225 "normal", 226 "ecdf", 227 "permutation", 228 "smooth-bootstrap", 229 ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')" 230 231 if self.type_pi == "bootstrap": 232 np.random.seed(self.seed) 233 self.residuals_sims_ = np.asarray( 234 [ 235 np.random.choice( 236 a=self.scaled_calibrated_residuals_, 237 size=X.shape[0], 238 ) 239 for _ in range(self.replications) 240 ] 241 ).T 242 self.sims_ = np.asarray( 243 [ 244 pred 245 + self.calibrated_residuals_scaler_.scale_[0] 246 * self.residuals_sims_[:, i].ravel() 247 for i in range(self.replications) 248 ] 249 ).T 250 elif self.type_pi == "kde": 251 self.kde_ = gaussian_kde( 252 dataset=self.scaled_calibrated_residuals_ 253 ) 254 self.sims_ = np.asarray( 255 [ 256 pred 257 + self.calibrated_residuals_scaler_.scale_[0] 258 * self.kde_.resample( 259 size=X.shape[0], seed=self.seed + i 260 ).ravel() 261 for i in range(self.replications) 262 ] 263 ).T 264 else: # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap" 265 self.residuals_sims_ = np.asarray( 266 simulate_replications( 267 data=self.scaled_calibrated_residuals_, 268 method=self.type_pi, 269 num_replications=self.replications, 270 n_obs=X.shape[0], 271 seed=self.seed, 272 ) 273 ).T 274 self.sims_ = np.asarray( 275 [ 276 pred 277 + self.calibrated_residuals_scaler_.scale_[0] 278 * self.residuals_sims_[:, i].ravel() 279 for i in range(self.replications) 280 ] 281 ).T 282 283 self.mean_ = np.mean(self.sims_, axis=1) 284 self.lower_ = np.quantile( 285 self.sims_, q=self.alpha_ / 200, axis=1 286 ) 287 self.upper_ = np.quantile( 288 self.sims_, q=1 - self.alpha_ / 200, axis=1 289 ) 290 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "sims", "lower", "upper") 293 ) 294 295 return DescribeResult( 296 self.mean_, self.sims_, self.lower_, self.upper_ 297 ) 298 299 if self.method == "localconformal": 300 if self.replications is None: 301 if return_pi: 302 predictions_bounds = self.icp_.predict( 303 X, significance=1 - self.level 304 ) 305 DescribeResult = namedtuple( 306 "DescribeResult", ("mean", "lower", "upper") 307 ) 308 return DescribeResult( 309 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 310 ) 311 312 else: 313 return pred 314 315 else: # (self.method == "localconformal") and if self.replications is not None 316 raise NotImplementedError( 317 "When self.method == 'localconformal', there are no simulations" 318 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
20class PredictionSet(BaseEstimator, ClassifierMixin): 21 """Class PredictionSet: Obtain prediction sets. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction sets. 30 Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal) 31 32 level: a float; 33 Confidence level for prediction sets. Default is None, 34 95 is equivalent to a miscoverage error of 5 (%) 35 36 seed: an integer; 37 Reproducibility of fit (there's a random split between fitting and calibration data) 38 """ 39 40 def __init__( 41 self, 42 obj, 43 method="icp", 44 level=None, 45 seed=123, 46 ): 47 self.obj = obj 48 self.method = method 49 self.level = level 50 self.seed = seed 51 if self.level is not None: 52 self.alpha_ = 1 - self.level / 100 53 self.quantile_ = None 54 self.icp_ = None 55 self.tcp_ = None 56 57 if self.method == "icp": 58 self.icp_ = IcpClassifier( 59 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 60 ) 61 elif self.method == "tcp": 62 self.tcp_ = TcpClassifier( 63 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 64 ) 65 else: 66 raise ValueError("`self.method` must be in ('icp', 'tcp')") 67 68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self 94 95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')") 114 115 def predict_proba(self, X): 116 predictions = self.predict(X) 117 return np.eye(len(np.unique(predictions)))[predictions]
Class PredictionSet: Obtain prediction sets.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction sets.
Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
level: a float;
Confidence level for prediction sets. Default is None,
95 is equivalent to a miscoverage error of 5 (%)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')")
Obtain predictions and prediction sets
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 _estimator_type = "classifier" 77 78 def __init__( 79 self, 80 obj, 81 ): 82 self.type_fit = "classification" 83 self.obj = obj 84 self.fit_objs_ = {} 85 self.X_scaler_ = StandardScaler() 86 self.scaled_X_ = None 87 88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self 135 136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 153 154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 n_features = shape_X[0] 177 178 new_X = mo.rbind( 179 X.reshape(1, n_features), 180 np.ones(n_features).reshape(1, n_features), 181 ) 182 183 Z = self.X_scaler_.transform(new_X, **kwargs) 184 185 # Fallback to standard model 186 for i in range(self.n_classes_): 187 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 188 189 else: # multiple rows 190 Z = self.X_scaler_.transform(X, **kwargs) 191 192 # Fallback to standard model 193 for i in range(self.n_classes_): 194 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 195 196 expit_raw_probs = expit(probs) 197 198 # Add small epsilon to avoid division by zero 199 row_sums = expit_raw_probs.sum(axis=1)[:, None] 200 row_sums[row_sums < 1e-10] = 1e-10 201 202 return expit_raw_probs / row_sums 203 204 def decision_function(self, X, **kwargs): 205 """Compute the decision function of X. 206 207 Parameters: 208 X: {array-like}, shape = [n_samples, n_features] 209 Samples to compute decision function for. 210 211 **kwargs: additional parameters to be passed to 212 self.cook_test_set 213 214 Returns: 215 array-like of shape (n_samples,) or (n_samples, n_classes) 216 Decision function of the input samples. The order of outputs is the same 217 as that of the classes passed to fit. 218 """ 219 if not hasattr(self.obj, "decision_function"): 220 # If base classifier doesn't have decision_function, use predict_proba 221 proba = self.predict_proba(X, **kwargs) 222 if proba.shape[1] == 2: 223 return proba[:, 1] # For binary classification 224 return proba # For multiclass 225 226 if len(X.shape) == 1: 227 n_features = X.shape[0] 228 new_X = mo.rbind( 229 X.reshape(1, n_features), 230 np.ones(n_features).reshape(1, n_features), 231 ) 232 233 return ( 234 self.obj.decision_function( 235 self.cook_test_set(new_X, **kwargs), **kwargs 236 ) 237 )[0] 238 239 return self.obj.decision_function( 240 self.cook_test_set(X, **kwargs), **kwargs 241 ) 242 243 @property 244 def _estimator_type(self): 245 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 n_features = shape_X[0] 177 178 new_X = mo.rbind( 179 X.reshape(1, n_features), 180 np.ones(n_features).reshape(1, n_features), 181 ) 182 183 Z = self.X_scaler_.transform(new_X, **kwargs) 184 185 # Fallback to standard model 186 for i in range(self.n_classes_): 187 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 188 189 else: # multiple rows 190 Z = self.X_scaler_.transform(X, **kwargs) 191 192 # Fallback to standard model 193 for i in range(self.n_classes_): 194 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 195 196 expit_raw_probs = expit(probs) 197 198 # Add small epsilon to avoid division by zero 199 row_sums = expit_raw_probs.sum(axis=1)[:, None] 200 row_sums[row_sums < 1e-10] = 1e-10 201 202 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 """ 71 72 def __init__(self, obj, level=95, scoring="predictions"): 73 assert scoring in ( 74 "predictions", 75 "residuals", 76 "conformal", 77 "studentized", 78 "conformal-studentized", 79 ), "scoring must be 'predictions' or 'residuals'" 80 self.obj = obj 81 low_risk_level = (1 - level / 100) / 2 82 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 83 self.scoring = scoring 84 self.offset_multipliers_ = None 85 self.obj_ = None 86 self.scoring_residuals_ = None 87 self.student_multiplier_ = None 88 89 def _compute_quantile_loss(self, residuals, quantile): 90 """ 91 Compute the quantile loss for a given set of residuals and quantile. 92 """ 93 return np.mean( 94 residuals 95 * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)) 96 ) 97 98 def _optimize_multiplier( 99 self, 100 y, 101 base_predictions, 102 prev_predictions, 103 scoring_residuals=None, 104 quantile=0.5, 105 ): 106 """ 107 Optimize the multiplier for a given quantile. 108 """ 109 if not 0 < quantile < 1: 110 raise ValueError("Quantile should be between 0 and 1.") 111 112 n = len(y) 113 114 def objective(log_multiplier): 115 """ 116 Objective function for optimization. 117 """ 118 # Convert to positive multiplier using exp 119 multiplier = np.exp(log_multiplier[0]) 120 if self.scoring == "predictions": 121 assert ( 122 base_predictions is not None 123 ), "base_predictions must be not None" 124 # Calculate predictions 125 if prev_predictions is None: 126 # For first quantile, subtract from conditional expectation 127 predictions = base_predictions - multiplier * np.abs( 128 base_predictions 129 ) 130 else: 131 # For other quantiles, add to previous quantile 132 offset = multiplier * np.abs(base_predictions) 133 predictions = prev_predictions + offset 134 elif self.scoring in ("residuals", "conformal"): 135 assert ( 136 scoring_residuals is not None 137 ), "scoring_residuals must be not None" 138 # print("scoring_residuals", scoring_residuals) 139 # Calculate predictions 140 if prev_predictions is None: 141 # For first quantile, subtract from conditional expectation 142 predictions = base_predictions - multiplier * np.std( 143 scoring_residuals 144 ) / np.sqrt(len(scoring_residuals)) 145 # print("predictions", predictions) 146 else: 147 # For other quantiles, add to previous quantile 148 offset = ( 149 multiplier 150 * np.std(scoring_residuals) 151 / np.sqrt(len(scoring_residuals)) 152 ) 153 predictions = prev_predictions + offset 154 elif self.scoring in ("studentized", "conformal-studentized"): 155 assert ( 156 scoring_residuals is not None 157 ), "scoring_residuals must be not None" 158 # Calculate predictions 159 if prev_predictions is None: 160 # For first quantile, subtract from conditional expectation 161 predictions = ( 162 base_predictions - multiplier * self.student_multiplier_ 163 ) 164 # print("predictions", predictions) 165 else: 166 # For other quantiles, add to previous quantile 167 offset = multiplier * self.student_multiplier_ 168 predictions = prev_predictions + offset 169 else: 170 raise ValueError("Invalid argument 'scoring'") 171 172 return self._compute_quantile_loss(y - predictions, quantile) 173 174 # Optimize in log space for numerical stability 175 # bounds = [(-10, 10)] # log space bounds 176 bounds = [(-100, 100)] # log space bounds 177 result = differential_evolution( 178 objective, 179 bounds, 180 # popsize=15, 181 # maxiter=100, 182 # tol=1e-4, 183 popsize=25, 184 maxiter=200, 185 tol=1e-6, 186 disp=False, 187 ) 188 189 return np.exp(result.x[0]) 190 191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 self.obj_.fit(X, y) 206 base_predictions = self.obj_.predict(X) 207 scoring_residuals = y - base_predictions 208 self.scoring_residuals_ = scoring_residuals 209 210 elif self.scoring == "conformal": 211 X_train, X_calib, y_train, y_calib = train_test_split( 212 X, y, test_size=0.5, random_state=42 213 ) 214 self.obj_.fit(X_train, y_train) 215 scoring_residuals = y_calib - self.obj_.predict( 216 X_calib 217 ) # These are calibration predictions 218 self.scoring_residuals_ = scoring_residuals 219 # Update base_predictions to use training predictions for optimization 220 self.obj_.fit(X_calib, y_calib) 221 base_predictions = self.obj_.predict(X_calib) 222 223 elif self.scoring in ("studentized", "conformal-studentized"): 224 # Calculate student multiplier 225 if self.scoring == "conformal-studentized": 226 X_train, X_calib, y_train, y_calib = train_test_split( 227 X, y, test_size=0.5, random_state=42 228 ) 229 self.obj_.fit(X_train, y_train) 230 scoring_residuals = y_calib - self.obj_.predict(X_calib) 231 # Calculate studentized multiplier using calibration data 232 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 233 len(y_calib) - 1 234 ) 235 self.obj_.fit(X_calib, y_calib) 236 base_predictions = self.obj_.predict(X_calib) 237 else: # regular studentized 238 self.obj_.fit(X, y) 239 base_predictions = self.obj_.predict(X) 240 scoring_residuals = y - base_predictions 241 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 242 len(y) - 1 243 ) 244 245 # Initialize storage for multipliers 246 self.offset_multipliers_ = [] 247 # Keep track of current predictions for each quantile 248 current_predictions = None 249 250 # Fit each quantile sequentially 251 for i, quantile in enumerate(self.quantiles): 252 if self.scoring == "predictions": 253 multiplier = self._optimize_multiplier( 254 y=y, 255 base_predictions=base_predictions, 256 prev_predictions=current_predictions, 257 quantile=quantile, 258 ) 259 260 self.offset_multipliers_.append(multiplier) 261 262 # Update current predictions 263 if current_predictions is None: 264 # First quantile (lowest) 265 current_predictions = ( 266 base_predictions - multiplier * np.abs(base_predictions) 267 ) 268 else: 269 # Subsequent quantiles 270 offset = multiplier * np.abs(base_predictions) 271 current_predictions = current_predictions + offset 272 273 elif self.scoring == "residuals": 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = ( 288 base_predictions 289 - multiplier 290 * np.std(scoring_residuals) 291 / np.sqrt(len(scoring_residuals)) 292 ) 293 else: 294 # Subsequent quantiles 295 offset = ( 296 multiplier 297 * np.std(scoring_residuals) 298 / np.sqrt(len(scoring_residuals)) 299 ) 300 current_predictions = current_predictions + offset 301 302 elif self.scoring == "conformal": 303 multiplier = self._optimize_multiplier( 304 y=y_calib, 305 base_predictions=base_predictions, 306 scoring_residuals=scoring_residuals, 307 prev_predictions=current_predictions, 308 quantile=quantile, 309 ) 310 311 self.offset_multipliers_.append(multiplier) 312 313 # Update current predictions 314 if current_predictions is None: 315 # First quantile (lowest) 316 current_predictions = ( 317 base_predictions 318 - multiplier 319 * np.std(scoring_residuals) 320 / np.sqrt(len(scoring_residuals)) 321 ) 322 else: 323 # Subsequent quantiles 324 offset = ( 325 multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 current_predictions = current_predictions + offset 330 331 elif self.scoring in ("studentized", "conformal-studentized"): 332 multiplier = self._optimize_multiplier( 333 y=y_calib if self.scoring == "conformal-studentized" else y, 334 base_predictions=base_predictions, 335 scoring_residuals=scoring_residuals, 336 prev_predictions=current_predictions, 337 quantile=quantile, 338 ) 339 340 self.offset_multipliers_.append(multiplier) 341 342 # Update current predictions 343 if current_predictions is None: 344 current_predictions = ( 345 base_predictions - multiplier * self.student_multiplier_ 346 ) 347 else: 348 offset = multiplier * self.student_multiplier_ 349 current_predictions = current_predictions + offset 350 351 return self 352 353 def predict(self, X, return_pi=False): 354 """Predict the target variable. 355 356 Parameters: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number of samples and 360 n_features is the number of features. 361 362 return_pi: bool, default=True 363 Whether to return the prediction intervals. 364 """ 365 if self.obj_ is None or self.offset_multipliers_ is None: 366 raise ValueError("Model not fitted yet.") 367 368 base_predictions = self.obj_.predict(X) 369 all_predictions = [] 370 371 if self.scoring == "predictions": 372 # Generate first quantile 373 current_predictions = base_predictions - self.offset_multipliers_[ 374 0 375 ] * np.abs(base_predictions) 376 all_predictions.append(current_predictions) 377 378 # Generate remaining quantiles 379 for multiplier in self.offset_multipliers_[1:]: 380 offset = multiplier * np.abs(base_predictions) 381 current_predictions = current_predictions + offset 382 all_predictions.append(current_predictions) 383 384 elif self.scoring in ("residuals", "conformal"): 385 # Generate first quantile 386 current_predictions = base_predictions - self.offset_multipliers_[ 387 0 388 ] * np.std(self.scoring_residuals_) / np.sqrt( 389 len(self.scoring_residuals_) 390 ) 391 all_predictions.append(current_predictions) 392 393 # Generate remaining quantiles 394 for multiplier in self.offset_multipliers_[1:]: 395 offset = ( 396 multiplier 397 * np.std(self.scoring_residuals_) 398 / np.sqrt(len(self.scoring_residuals_)) 399 ) 400 current_predictions = current_predictions + offset 401 all_predictions.append(current_predictions) 402 403 elif self.scoring in ("studentized", "conformal-studentized"): 404 # Generate first quantile 405 current_predictions = ( 406 base_predictions 407 - self.offset_multipliers_[0] * self.student_multiplier_ 408 ) 409 all_predictions.append(current_predictions) 410 411 # Generate remaining quantiles 412 for multiplier in self.offset_multipliers_[1:]: 413 offset = multiplier * self.student_multiplier_ 414 current_predictions = current_predictions + offset 415 all_predictions.append(current_predictions) 416 417 if return_pi == False: 418 return np.asarray(all_predictions[1]) 419 420 DescribeResult = namedtuple( 421 "DecribeResult", ["mean", "lower", "upper", "median"] 422 ) 423 DescribeResult.mean = base_predictions 424 DescribeResult.lower = np.asarray(all_predictions[0]) 425 DescribeResult.median = np.asarray(all_predictions[1]) 426 DescribeResult.upper = np.asarray(all_predictions[2]) 427 428 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 self.obj_.fit(X, y) 206 base_predictions = self.obj_.predict(X) 207 scoring_residuals = y - base_predictions 208 self.scoring_residuals_ = scoring_residuals 209 210 elif self.scoring == "conformal": 211 X_train, X_calib, y_train, y_calib = train_test_split( 212 X, y, test_size=0.5, random_state=42 213 ) 214 self.obj_.fit(X_train, y_train) 215 scoring_residuals = y_calib - self.obj_.predict( 216 X_calib 217 ) # These are calibration predictions 218 self.scoring_residuals_ = scoring_residuals 219 # Update base_predictions to use training predictions for optimization 220 self.obj_.fit(X_calib, y_calib) 221 base_predictions = self.obj_.predict(X_calib) 222 223 elif self.scoring in ("studentized", "conformal-studentized"): 224 # Calculate student multiplier 225 if self.scoring == "conformal-studentized": 226 X_train, X_calib, y_train, y_calib = train_test_split( 227 X, y, test_size=0.5, random_state=42 228 ) 229 self.obj_.fit(X_train, y_train) 230 scoring_residuals = y_calib - self.obj_.predict(X_calib) 231 # Calculate studentized multiplier using calibration data 232 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 233 len(y_calib) - 1 234 ) 235 self.obj_.fit(X_calib, y_calib) 236 base_predictions = self.obj_.predict(X_calib) 237 else: # regular studentized 238 self.obj_.fit(X, y) 239 base_predictions = self.obj_.predict(X) 240 scoring_residuals = y - base_predictions 241 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 242 len(y) - 1 243 ) 244 245 # Initialize storage for multipliers 246 self.offset_multipliers_ = [] 247 # Keep track of current predictions for each quantile 248 current_predictions = None 249 250 # Fit each quantile sequentially 251 for i, quantile in enumerate(self.quantiles): 252 if self.scoring == "predictions": 253 multiplier = self._optimize_multiplier( 254 y=y, 255 base_predictions=base_predictions, 256 prev_predictions=current_predictions, 257 quantile=quantile, 258 ) 259 260 self.offset_multipliers_.append(multiplier) 261 262 # Update current predictions 263 if current_predictions is None: 264 # First quantile (lowest) 265 current_predictions = ( 266 base_predictions - multiplier * np.abs(base_predictions) 267 ) 268 else: 269 # Subsequent quantiles 270 offset = multiplier * np.abs(base_predictions) 271 current_predictions = current_predictions + offset 272 273 elif self.scoring == "residuals": 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = ( 288 base_predictions 289 - multiplier 290 * np.std(scoring_residuals) 291 / np.sqrt(len(scoring_residuals)) 292 ) 293 else: 294 # Subsequent quantiles 295 offset = ( 296 multiplier 297 * np.std(scoring_residuals) 298 / np.sqrt(len(scoring_residuals)) 299 ) 300 current_predictions = current_predictions + offset 301 302 elif self.scoring == "conformal": 303 multiplier = self._optimize_multiplier( 304 y=y_calib, 305 base_predictions=base_predictions, 306 scoring_residuals=scoring_residuals, 307 prev_predictions=current_predictions, 308 quantile=quantile, 309 ) 310 311 self.offset_multipliers_.append(multiplier) 312 313 # Update current predictions 314 if current_predictions is None: 315 # First quantile (lowest) 316 current_predictions = ( 317 base_predictions 318 - multiplier 319 * np.std(scoring_residuals) 320 / np.sqrt(len(scoring_residuals)) 321 ) 322 else: 323 # Subsequent quantiles 324 offset = ( 325 multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 current_predictions = current_predictions + offset 330 331 elif self.scoring in ("studentized", "conformal-studentized"): 332 multiplier = self._optimize_multiplier( 333 y=y_calib if self.scoring == "conformal-studentized" else y, 334 base_predictions=base_predictions, 335 scoring_residuals=scoring_residuals, 336 prev_predictions=current_predictions, 337 quantile=quantile, 338 ) 339 340 self.offset_multipliers_.append(multiplier) 341 342 # Update current predictions 343 if current_predictions is None: 344 current_predictions = ( 345 base_predictions - multiplier * self.student_multiplier_ 346 ) 347 else: 348 offset = multiplier * self.student_multiplier_ 349 current_predictions = current_predictions + offset 350 351 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
353 def predict(self, X, return_pi=False): 354 """Predict the target variable. 355 356 Parameters: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number of samples and 360 n_features is the number of features. 361 362 return_pi: bool, default=True 363 Whether to return the prediction intervals. 364 """ 365 if self.obj_ is None or self.offset_multipliers_ is None: 366 raise ValueError("Model not fitted yet.") 367 368 base_predictions = self.obj_.predict(X) 369 all_predictions = [] 370 371 if self.scoring == "predictions": 372 # Generate first quantile 373 current_predictions = base_predictions - self.offset_multipliers_[ 374 0 375 ] * np.abs(base_predictions) 376 all_predictions.append(current_predictions) 377 378 # Generate remaining quantiles 379 for multiplier in self.offset_multipliers_[1:]: 380 offset = multiplier * np.abs(base_predictions) 381 current_predictions = current_predictions + offset 382 all_predictions.append(current_predictions) 383 384 elif self.scoring in ("residuals", "conformal"): 385 # Generate first quantile 386 current_predictions = base_predictions - self.offset_multipliers_[ 387 0 388 ] * np.std(self.scoring_residuals_) / np.sqrt( 389 len(self.scoring_residuals_) 390 ) 391 all_predictions.append(current_predictions) 392 393 # Generate remaining quantiles 394 for multiplier in self.offset_multipliers_[1:]: 395 offset = ( 396 multiplier 397 * np.std(self.scoring_residuals_) 398 / np.sqrt(len(self.scoring_residuals_)) 399 ) 400 current_predictions = current_predictions + offset 401 all_predictions.append(current_predictions) 402 403 elif self.scoring in ("studentized", "conformal-studentized"): 404 # Generate first quantile 405 current_predictions = ( 406 base_predictions 407 - self.offset_multipliers_[0] * self.student_multiplier_ 408 ) 409 all_predictions.append(current_predictions) 410 411 # Generate remaining quantiles 412 for multiplier in self.offset_multipliers_[1:]: 413 offset = multiplier * self.student_multiplier_ 414 current_predictions = current_predictions + offset 415 all_predictions.append(current_predictions) 416 417 if return_pi == False: 418 return np.asarray(all_predictions[1]) 419 420 DescribeResult = namedtuple( 421 "DecribeResult", ["mean", "lower", "upper", "median"] 422 ) 423 DescribeResult.mean = base_predictions 424 DescribeResult.lower = np.asarray(all_predictions[0]) 425 DescribeResult.median = np.asarray(all_predictions[1]) 426 DescribeResult.upper = np.asarray(all_predictions[2]) 427 428 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
43class QuantileClassifier(BaseEstimator, ClassifierMixin): 44 """ 45 Quantile Classifier. 46 47 Parameters: 48 49 obj: base model (classification model) 50 The base classifier from which to build a 51 quantile classifier. 52 53 level: int, default=95 54 The level of the quantiles to compute. 55 56 scoring: str, default="predictions" 57 The scoring to use for the optimization and constructing 58 prediction intervals (predictions, residuals, conformal, 59 studentized, conformal-studentized). 60 61 Attributes: 62 63 obj_ : base model (classification model) 64 The base classifier from which to build a 65 quantile classifier. 66 67 offset_multipliers_ : list 68 The multipliers for the offset. 69 70 scoring_residuals_ : list 71 The residuals for the scoring. 72 73 student_multiplier_ : float 74 The multiplier for the student. 75 76 77 """ 78 79 def __init__(self, obj, level=95, scoring="predictions"): 80 assert scoring in ( 81 "predictions", 82 "residuals", 83 "conformal", 84 "studentized", 85 "conformal-studentized", 86 ), "scoring must be 'predictions' or 'residuals'" 87 self.obj = obj 88 quantileregressor = QuantileRegressor(self.obj) 89 quantileregressor.predict = partial( 90 quantileregressor.predict, return_pi=False 91 ) 92 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 93 94 def fit(self, X, y, **kwargs): 95 self.obj_.fit(X, y, **kwargs) 96 97 def predict(self, X, **kwargs): 98 return self.obj_.predict(X, **kwargs) 99 100 def predict_proba(self, X, **kwargs): 101 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self 242 243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 _estimator_type = "classifier" 134 135 def __init__( 136 self, 137 obj, 138 n_estimators=10, 139 n_hidden_features=1, 140 activation_name="relu", 141 a=0.01, 142 nodes_sim="sobol", 143 bias=True, 144 dropout=0, 145 direct_link=False, 146 n_clusters=2, 147 cluster_encode=True, 148 type_clust="kmeans", 149 type_scaling=("std", "std", "std"), 150 col_sample=1, 151 row_sample=1, 152 n_jobs=None, 153 seed=123, 154 verbose=1, 155 backend="cpu", 156 ): 157 super().__init__( 158 obj=obj, 159 n_estimators=n_estimators, 160 n_hidden_features=n_hidden_features, 161 activation_name=activation_name, 162 a=a, 163 nodes_sim=nodes_sim, 164 bias=bias, 165 dropout=dropout, 166 direct_link=direct_link, 167 n_clusters=n_clusters, 168 cluster_encode=cluster_encode, 169 type_clust=type_clust, 170 type_scaling=type_scaling, 171 col_sample=col_sample, 172 row_sample=row_sample, 173 seed=seed, 174 backend=backend, 175 ) 176 177 self.type_fit = "classification" 178 self.verbose = verbose 179 self.n_jobs = n_jobs 180 self.voter_ = {} 181 182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self 263 264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 282 283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba 380 381 @property 382 def _estimator_type(self): 383 return "classifier"
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class RandomFourierEstimator(BaseEstimator): 24 def __init__( 25 self, estimator, n_components=100, gamma=1.0, random_state=None 26 ): 27 """ 28 Random Fourier Features transformation with a given estimator. 29 30 Parameters: 31 - estimator: A scikit-learn estimator (classifier, regressor, etc.). 32 - n_components: Number of random Fourier features. 33 - gamma: Hyperparameter for RBF kernel approximation. 34 - random_state: Random state for reproducibility. 35 """ 36 self.estimator = estimator 37 self.n_components = n_components 38 self.gamma = gamma 39 self.random_state = random_state 40 41 # Dynamically set the estimator type and appropriate mixin 42 estimator_type = _get_estimator_type(estimator) 43 if estimator_type == "classifier": 44 self._estimator_type = "classifier" 45 # Add ClassifierMixin to the class hierarchy 46 if not isinstance(self, ClassifierMixin): 47 self.__class__ = type( 48 self.__class__.__name__, 49 (self.__class__, ClassifierMixin), 50 dict(self.__class__.__dict__), 51 ) 52 elif estimator_type == "regressor": 53 self._estimator_type = "regressor" 54 # Add RegressorMixin to the class hierarchy 55 if not isinstance(self, RegressorMixin): 56 self.__class__ = type( 57 self.__class__.__name__, 58 (self.__class__, RegressorMixin), 59 dict(self.__class__.__dict__), 60 ) 61 62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self 80 81 def partial_fit(self, X, y, classes=None): 82 """ 83 Incrementally fit the Random Fourier feature transformer and the estimator. 84 """ 85 X = check_array(X) 86 87 # Check if RFF transformer is already fitted 88 if not hasattr(self, "rff_"): 89 # First call - fit the transformer 90 self.rff_ = RBFSampler( 91 n_components=self.n_components, 92 gamma=self.gamma, 93 random_state=self.random_state, 94 ) 95 X_transformed = self.rff_.fit_transform(X) 96 else: 97 # Subsequent calls - only transform 98 X_transformed = self.rff_.transform(X) 99 100 # If estimator supports partial_fit, we use it, otherwise raise an error 101 if hasattr(self.estimator, "partial_fit"): 102 self.estimator.partial_fit(X_transformed, y, classes=classes) 103 else: 104 raise ValueError( 105 f"The estimator {type(self.estimator).__name__} does not support partial_fit method." 106 ) 107 108 return self 109 110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed) 122 123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed) 148 149 def predict_log_proba(self, X): 150 """ 151 Predict class log probabilities (only for classifiers). 152 """ 153 if ( 154 not hasattr(self, "_estimator_type") 155 or self._estimator_type != "classifier" 156 ): 157 raise AttributeError( 158 "predict_log_proba is not available for this estimator type." 159 ) 160 161 check_is_fitted(self, ["rff_"]) 162 X = check_array(X) 163 164 if not hasattr(self.estimator, "predict_log_proba"): 165 raise ValueError( 166 f"The estimator {type(self.estimator).__name__} does not support predict_log_proba." 167 ) 168 169 # Transform the input data 170 X_transformed = self.rff_.transform(X) 171 172 return self.estimator.predict_log_proba(X_transformed) 173 174 def decision_function(self, X): 175 """ 176 Decision function (only for classifiers). 177 """ 178 if ( 179 not hasattr(self, "_estimator_type") 180 or self._estimator_type != "classifier" 181 ): 182 raise AttributeError( 183 "decision_function is not available for this estimator type." 184 ) 185 186 check_is_fitted(self, ["rff_"]) 187 X = check_array(X) 188 189 if not hasattr(self.estimator, "decision_function"): 190 raise ValueError( 191 f"The estimator {type(self.estimator).__name__} does not support decision_function." 192 ) 193 194 # Transform the input data 195 X_transformed = self.rff_.transform(X) 196 197 return self.estimator.decision_function(X_transformed) 198 199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y) 211 212 @property 213 def classes_(self): 214 """Classes labels (only for classifiers).""" 215 if ( 216 hasattr(self, "_estimator_type") 217 and self._estimator_type == "classifier" 218 ): 219 return getattr(self.estimator, "classes_", None) 220 else: 221 raise AttributeError( 222 "classes_ is not available for this estimator type." 223 ) 224 225 def get_params(self, deep=True): 226 """ 227 Get parameters for this estimator. 228 """ 229 params = {} 230 231 # Get estimator parameters with proper prefixing 232 if deep: 233 estimator_params = self.estimator.get_params(deep=True) 234 for key, value in estimator_params.items(): 235 params[f"estimator__{key}"] = value 236 237 # Add our own parameters 238 params.update( 239 { 240 "estimator": self.estimator, 241 "n_components": self.n_components, 242 "gamma": self.gamma, 243 "random_state": self.random_state, 244 } 245 ) 246 247 return params 248 249 def set_params(self, **params): 250 """ 251 Set the parameters of this estimator. 252 """ 253 # Separate our parameters from estimator parameters 254 our_params = {} 255 estimator_params = {} 256 257 for param, value in params.items(): 258 if param.startswith("estimator__"): 259 # Remove the 'estimator__' prefix 260 estimator_params[param[11:]] = value 261 elif param in [ 262 "estimator", 263 "n_components", 264 "gamma", 265 "random_state", 266 ]: 267 our_params[param] = value 268 else: 269 # Assume it's an estimator parameter without prefix 270 estimator_params[param] = value 271 272 # Set our parameters 273 for param, value in our_params.items(): 274 setattr(self, param, value) 275 276 # If estimator changed, update the estimator type 277 if "estimator" in our_params: 278 self.__init__( 279 self.estimator, self.n_components, self.gamma, self.random_state 280 ) 281 282 # Set estimator parameters 283 if estimator_params: 284 self.estimator.set_params(**estimator_params) 285 286 # If RFF parameters changed and model is fitted, we need to refit 287 if hasattr(self, "rff_") and ( 288 "n_components" in our_params 289 or "gamma" in our_params 290 or "random_state" in our_params 291 ): 292 # Remove the fitted transformer so it gets recreated on next fit 293 delattr(self, "rff_") 294 295 return self
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self
Fit the Random Fourier feature transformer and the estimator.
110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed)
Predict using the Random Fourier transformed data.
123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed)
Predict class probabilities (only for classifiers).
199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y)
Evaluate the model performance.
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self 81 82 def predict(self, X): 83 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 84 return self.regr.predict(X) 85 86 def partial_fit(self, X, y): 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 newX = deepcopy(X) 103 104 if isinstance( 105 self.regr, CustomRegressor 106 ): # other nnetsauce model (CustomRegressor) --- 107 newX = self.regr.cook_test_set(X=X) 108 if isinstance(X, pd.DataFrame): 109 newx = newX.values.ravel() 110 else: 111 newx = newX.ravel() 112 113 else: # an sklearn model --- 114 if isinstance(X, pd.DataFrame): 115 newx = X.values.ravel() 116 else: 117 newx = X.ravel() 118 119 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 120 newx, y - np.dot(newx, self.regr.coef_) 121 ) 122 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 123 self.coef_ = deepcopy(self.regr.coef_) 124 self.n_obs_ += 1 125 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 _estimator_type = "classifier" 39 40 def __init__(self, clf, alpha=0.5): 41 self.clf = clf 42 self.alpha = alpha 43 self.n_obs_ = None 44 self.coef_ = None 45 self.updating_factor_ = None 46 try: 47 self.coef_ = self.clf.coef_ 48 if isinstance(self.clf, Base): 49 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 50 except AttributeError: 51 pass 52 53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self 87 88 def predict(self, X): 89 raise NotImplementedError( 90 "predict method is not implemented for ClassifierUpdater" 91 ) 92 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 93 return self.clf.predict(X) 94 95 def partial_fit(self, X, y): 96 raise NotImplementedError( 97 "partial_fit method is not implemented for ClassifierUpdater" 98 ) 99 100 assert hasattr( 101 self.clf, "coef_" 102 ), "model must be fitted first (i.e have 'coef_' attribute)" 103 assert ( 104 self.n_obs_ is not None 105 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 106 107 if len(X.shape) == 1: 108 X = X.reshape(1, -1) 109 110 assert X.shape[0] == 1, "X must have one row" 111 112 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 113 114 if isinstance(self.clf, Base): # nnetsauce model --- 115 newX = deepcopy(X) 116 117 if isinstance( 118 self.clf, CustomClassifier 119 ): # other nnetsauce model (CustomClassifier) --- 120 newX = self.clf.cook_test_set(X=X) 121 if isinstance(X, pd.DataFrame): 122 newx = newX.values.ravel() 123 else: 124 newx = newX.ravel() 125 126 else: # an sklearn model --- 127 if isinstance(X, pd.DataFrame): 128 newx = X.values.ravel() 129 else: 130 newx = X.ravel() 131 132 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 133 newx, y - np.dot(newx, self.clf.coef_) 134 ) 135 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 136 self.coef_ = deepcopy(self.clf.coef_) 137 self.n_obs_ += 1 138 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self
24class RidgeRegressor(BaseEstimator, RegressorMixin): 25 """Ridge. 26 27 Attributes: 28 29 reg_lambda: float 30 regularization parameter. 31 32 backend: str 33 type of backend; must be in ('cpu', 'gpu', 'tpu') 34 35 """ 36 37 def __init__(self, reg_lambda=0.1, backend="cpu"): 38 assert backend in ( 39 "cpu", 40 "gpu", 41 "tpu", 42 ), "`backend` must be in ('cpu', 'gpu', 'tpu')" 43 44 sys_platform = platform.system() 45 46 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 47 warnings.warn( 48 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 49 ) 50 backend = "cpu" 51 52 self.reg_lambda = reg_lambda 53 self.backend = backend 54 self.coef_ = None 55 56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self 118 119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Ridge.
Attributes:
reg_lambda: float
regularization parameter.
backend: str
type of backend; must be in ('cpu', 'gpu', 'tpu')
56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self
Fit matrixops (classifier) to training data (X, y)
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to self.cook_training_set.
Returns:
self: object.
119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to `predict_proba`
Returns:
model predictions: {array-like}
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 y_mean_: float 87 average response 88 89 """ 90 91 # construct the object ----- 92 93 def __init__( 94 self, 95 n_hidden_features=5, 96 activation_name="relu", 97 a=0.01, 98 nodes_sim="sobol", 99 bias=True, 100 dropout=0, 101 n_clusters=2, 102 cluster_encode=True, 103 type_clust="kmeans", 104 type_scaling=("std", "std", "std"), 105 lambda1=0.1, 106 lambda2=0.1, 107 seed=123, 108 backend="cpu", 109 ): 110 super().__init__( 111 n_hidden_features=n_hidden_features, 112 activation_name=activation_name, 113 a=a, 114 nodes_sim=nodes_sim, 115 bias=bias, 116 dropout=dropout, 117 n_clusters=n_clusters, 118 cluster_encode=cluster_encode, 119 type_clust=type_clust, 120 type_scaling=type_scaling, 121 lambda1=lambda1, 122 lambda2=lambda2, 123 seed=seed, 124 backend=backend, 125 ) 126 127 self.type_fit = "regression" 128 129 def fit(self, X, y, **kwargs): 130 """Fit Ridge model to training data (X, y). 131 132 Args: 133 134 X: {array-like}, shape = [n_samples, n_features] 135 Training vectors, where n_samples is the number 136 of samples and n_features is the number of features. 137 138 y: array-like, shape = [n_samples] 139 Target values. 140 141 **kwargs: additional parameters to be passed to 142 self.cook_training_set or self.obj.fit 143 144 Returns: 145 146 self: object 147 148 """ 149 150 sys_platform = platform.system() 151 152 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 153 154 n_X, p_X = X.shape 155 n_Z, p_Z = scaled_Z.shape 156 157 if self.n_clusters > 0: 158 if self.encode_clusters == True: 159 n_features = p_X + self.n_clusters 160 else: 161 n_features = p_X + 1 162 else: 163 n_features = p_X 164 165 X_ = scaled_Z[:, 0:n_features] 166 Phi_X_ = scaled_Z[:, n_features:p_Z] 167 168 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 169 np.repeat(1, n_features) 170 ) 171 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 172 D = mo.crossprod( 173 x=Phi_X_, backend=self.backend 174 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 175 176 if sys_platform in ("Linux", "Darwin"): 177 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 178 else: 179 B_inv = pinv(B) 180 181 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 182 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 183 184 if sys_platform in ("Linux", "Darwin"): 185 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 186 else: 187 S_inv = pinv(S_mat) 188 189 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 190 inv = mo.rbind( 191 mo.cbind( 192 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 193 y=-np.transpose(Y), 194 backend=self.backend, 195 ), 196 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 197 backend=self.backend, 198 ) 199 200 self.beta_ = mo.safe_sparse_dot( 201 a=inv, 202 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 203 backend=self.backend, 204 ) 205 206 self.coef_ = self.beta_ # sklearn compatibility 207 208 return self 209 210 def predict(self, X, **kwargs): 211 """Predict test data X. 212 213 Args: 214 215 X: {array-like}, shape = [n_samples, n_features] 216 Training vectors, where n_samples is the number 217 of samples and n_features is the number of features. 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 return ( 237 self.y_mean_ 238 + mo.safe_sparse_dot( 239 a=self.cook_test_set(new_X, **kwargs), 240 b=self.beta_, 241 backend=self.backend, 242 ) 243 )[0] 244 245 return self.y_mean_ + mo.safe_sparse_dot( 246 a=self.cook_test_set(X, **kwargs), 247 b=self.beta_, 248 backend=self.backend, 249 ) 250 251 def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs): 252 """Incrementally fit the Ridge model using SGD-style updates. 253 254 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 255 for online learning with individual samples. 256 257 Args: 258 X: {array-like}, shape = [n_samples, n_features] 259 Training vectors for this batch 260 261 y: array-like, shape = [n_samples] 262 Target values for this batch 263 264 learning_rate: float, default=0.01 265 Initial learning rate for SGD updates 266 267 decay: float, default=0.001 268 Learning rate decay parameter 269 270 **kwargs: additional parameters to be passed to self.cook_training_set 271 272 Returns: 273 self: object 274 """ 275 276 # Input validation 277 X = np.asarray(X) 278 y = np.asarray(y) 279 280 if X.shape[0] != y.shape[0]: 281 raise ValueError("X and y must have the same number of samples") 282 283 # Handle first call 284 if not self._is_fitted: 285 # Initialize learning parameters 286 self.initial_learning_rate = learning_rate 287 self.decay = decay 288 self._step_count = 0 289 self._is_fitted = True 290 291 # Process the batch 292 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 293 294 # Get dimensions 295 n_samples, n_features_total = scaled_Z.shape 296 n_original_features = X.shape[1] 297 298 # Determine feature dimensions for regularization 299 if self.n_clusters > 0: 300 if self.cluster_encode: 301 n_direct_features = n_original_features + self.n_clusters 302 else: 303 n_direct_features = n_original_features + 1 304 else: 305 n_direct_features = n_original_features 306 307 # Initialize beta_ if first time 308 if not hasattr(self, "beta_") or self.beta_ is None: 309 # For regression, beta_ is 1D (single output) 310 self.beta_ = np.zeros(n_features_total) 311 312 # Precompute indices for regularization 313 direct_indices = slice(0, n_direct_features) 314 hidden_indices = slice(n_direct_features, n_features_total) 315 316 # Process each sample with SGD 317 for i in range(n_samples): 318 self._step_count += 1 319 320 # Current learning rate with decay 321 current_lr = self.initial_learning_rate / ( 322 1 + self.decay * self._step_count 323 ) 324 325 # Current sample and target 326 x_i = scaled_Z[i, :] # Feature vector 327 y_i = ( 328 centered_y[i] if centered_y.ndim == 1 else centered_y[i, 0] 329 ) # Scalar target 330 331 # Prediction: x_i^T * beta 332 prediction = x_i @ self.beta_ 333 334 # Error: y_i - prediction 335 error = y_i - prediction 336 337 # Gradient update: current_lr * x_i * error 338 gradient_update = current_lr * x_i * error 339 340 # Regularization terms (more efficient indexing) 341 reg_update = np.zeros_like(self.beta_) 342 reg_update[direct_indices] = ( 343 current_lr * self.lambda1 * self.beta_[direct_indices] 344 ) 345 reg_update[hidden_indices] = ( 346 current_lr * self.lambda2 * self.beta_[hidden_indices] 347 ) 348 349 # Combined update: beta = beta + gradient_update - reg_update 350 self.beta_ += gradient_update - reg_update 351 352 self.coef_ = self.beta_ # sklearn compatibility 353 354 return self
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
y_mean_: float
average response
129 def fit(self, X, y, **kwargs): 130 """Fit Ridge model to training data (X, y). 131 132 Args: 133 134 X: {array-like}, shape = [n_samples, n_features] 135 Training vectors, where n_samples is the number 136 of samples and n_features is the number of features. 137 138 y: array-like, shape = [n_samples] 139 Target values. 140 141 **kwargs: additional parameters to be passed to 142 self.cook_training_set or self.obj.fit 143 144 Returns: 145 146 self: object 147 148 """ 149 150 sys_platform = platform.system() 151 152 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 153 154 n_X, p_X = X.shape 155 n_Z, p_Z = scaled_Z.shape 156 157 if self.n_clusters > 0: 158 if self.encode_clusters == True: 159 n_features = p_X + self.n_clusters 160 else: 161 n_features = p_X + 1 162 else: 163 n_features = p_X 164 165 X_ = scaled_Z[:, 0:n_features] 166 Phi_X_ = scaled_Z[:, n_features:p_Z] 167 168 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 169 np.repeat(1, n_features) 170 ) 171 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 172 D = mo.crossprod( 173 x=Phi_X_, backend=self.backend 174 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 175 176 if sys_platform in ("Linux", "Darwin"): 177 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 178 else: 179 B_inv = pinv(B) 180 181 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 182 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 183 184 if sys_platform in ("Linux", "Darwin"): 185 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 186 else: 187 S_inv = pinv(S_mat) 188 189 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 190 inv = mo.rbind( 191 mo.cbind( 192 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 193 y=-np.transpose(Y), 194 backend=self.backend, 195 ), 196 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 197 backend=self.backend, 198 ) 199 200 self.beta_ = mo.safe_sparse_dot( 201 a=inv, 202 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 203 backend=self.backend, 204 ) 205 206 self.coef_ = self.beta_ # sklearn compatibility 207 208 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
210 def predict(self, X, **kwargs): 211 """Predict test data X. 212 213 Args: 214 215 X: {array-like}, shape = [n_samples, n_features] 216 Training vectors, where n_samples is the number 217 of samples and n_features is the number of features. 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 return ( 237 self.y_mean_ 238 + mo.safe_sparse_dot( 239 a=self.cook_test_set(new_X, **kwargs), 240 b=self.beta_, 241 backend=self.backend, 242 ) 243 )[0] 244 245 return self.y_mean_ + mo.safe_sparse_dot( 246 a=self.cook_test_set(X, **kwargs), 247 b=self.beta_, 248 backend=self.backend, 249 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 _estimator_type = "classifier" 141 142 # construct the object ----- 143 144 def __init__( 145 self, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 lambda1=0.1, 158 lambda2=0.1, 159 solver="L-BFGS-B", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 n_hidden_features=n_hidden_features, 165 activation_name=activation_name, 166 a=a, 167 nodes_sim=nodes_sim, 168 bias=bias, 169 dropout=dropout, 170 direct_link=direct_link, 171 n_clusters=n_clusters, 172 cluster_encode=cluster_encode, 173 type_clust=type_clust, 174 type_scaling=type_scaling, 175 lambda1=lambda1, 176 lambda2=lambda2, 177 seed=seed, 178 backend=backend, 179 ) 180 181 self.type_fit = "classification" 182 self.solver = solver 183 self.beta_ = None 184 self.classes_ = None 185 self.minloglik_ = None 186 187 def loglik(self, X, Y, **kwargs): 188 """Log-likelihood for training data (X, Y). 189 190 Args: 191 192 X: {array-like}, shape = [n_samples, n_features] 193 Training vectors, where n_samples is the number 194 of samples and n_features is the number of features. 195 196 Y: array-like, shape = [n_samples] 197 One-hot encode target values. 198 199 **kwargs: additional parameters to be passed to 200 self.cook_training_set or self.obj.fit 201 202 Returns: 203 204 """ 205 206 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 207 # nobs, n_classes 208 n, K = Y.shape 209 210 # total number of covariates 211 p = X.shape[1] 212 213 # initial number of covariates 214 init_p = p - self.n_hidden_features 215 216 max_double = 709.0 217 XB[XB > max_double] = max_double 218 exp_XB = np.exp(XB) 219 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 220 221 # gradient ----- 222 # (Y - p) -> (n, K) 223 # X -> (n, p) 224 # (K, n) %*% (n, p) -> (K, p) 225 if hessian is False: 226 grad = ( 227 -mo.safe_sparse_dot( 228 a=(Y - probs).T, b=X, backend=self.backend 229 ) 230 / n 231 ) 232 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 233 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 234 235 return grad.flatten() 236 237 # hessian ----- 238 if hessian is True: 239 Kp = K * p 240 hess = np.zeros((Kp, Kp), float) 241 for k1 in range(K): 242 x_index = range(k1 * p, (k1 + 1) * p) 243 for k2 in range(k1, K): 244 y_index = range(k2 * p, (k2 + 1) * p) 245 H_sub = ( 246 -mo.safe_sparse_dot( 247 a=X.T, 248 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 249 backend=self.backend, 250 ) 251 / n 252 ) # do not store 253 hess[np.ix_(x_index, y_index)] = hess[ 254 np.ix_(y_index, x_index) 255 ] = H_sub 256 257 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 258 259 # total number of covariates 260 p = X.shape[1] 261 262 # initial number of covariates 263 init_p = p - self.n_hidden_features 264 265 # log-likelihood (1st return) 266 def loglik_func(x): 267 # (p, K) 268 B = x.reshape(Y.shape[1], p).T 269 270 # (n, K) 271 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 272 273 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 274 275 res += ( 276 0.5 277 * self.lambda1 278 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 279 ) 280 res += ( 281 0.5 282 * self.lambda2 283 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 284 ) 285 286 return res 287 288 # gradient of log-likelihood 289 def grad_func(x): 290 # (p, K) 291 B = x.reshape(Y.shape[1], p).T 292 293 return loglik_grad_hess( 294 Y=Y, 295 X=X, 296 B=B, 297 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 298 hessian=False, 299 **kwargs 300 ) 301 302 # hessian of log-likelihood 303 def hessian_func(x): 304 # (p, K) 305 B = x.reshape(Y.shape[1], p).T 306 307 return loglik_grad_hess( 308 Y=Y, 309 X=X, 310 B=B, 311 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 312 hessian=True, 313 **kwargs 314 ) 315 316 return loglik_func, grad_func, hessian_func 317 318 # newton-cg 319 # L-BFGS-B 320 def fit(self, X, y, **kwargs): 321 """Fit Ridge model to training data (X, y). 322 323 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 324 for K classes and p covariates. 325 326 Args: 327 328 X: {array-like}, shape = [n_samples, n_features] 329 Training vectors, where n_samples is the number 330 of samples and n_features is the number of features. 331 332 y: array-like, shape = [n_samples] 333 Target values. 334 335 **kwargs: additional parameters to be passed to 336 self.cook_training_set or self.obj.fit 337 338 Returns: 339 340 self: object 341 342 """ 343 344 assert mx.is_factor(y), "y must contain only integers" 345 346 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 347 348 self.n_classes = len(np.unique(y)) 349 self.classes_ = np.unique(y) # for compatibility with sklearn 350 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 351 352 Y = mo.one_hot_encode2(output_y, self.n_classes) 353 354 # optimize for beta, minimize self.loglik (maximize loglik) ----- 355 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 356 357 if self.solver == "L-BFGS-B": 358 opt = minimize( 359 fun=loglik_func, 360 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 361 jac=grad_func, 362 method=self.solver, 363 ) 364 self.beta_ = opt.x 365 self.minloglik_ = opt.fun 366 367 if self.solver in ("Newton-CG", "trust-ncg"): 368 opt = minimize( 369 fun=loglik_func, 370 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 371 jac=grad_func, 372 hess=hessian_func, 373 method=self.solver, 374 ) 375 self.beta_ = opt.x 376 self.minloglik_ = opt.fun 377 378 if self.solver == "L-BFGS-B-lstsq": 379 opt = minimize( 380 fun=loglik_func, 381 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 382 order="F" 383 ), 384 jac=grad_func, 385 method="L-BFGS-B", 386 ) 387 self.beta_ = opt.x 388 self.minloglik_ = opt.fun 389 390 if self.solver in "Newton-CG-lstsq": 391 opt = minimize( 392 fun=loglik_func, 393 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 394 order="F" 395 ), 396 jac=grad_func, 397 hess=hessian_func, 398 method="Newton-CG", 399 ) 400 self.beta_ = opt.x 401 self.minloglik_ = opt.fun 402 403 if self.solver in "trust-ncg-lstsq": 404 opt = minimize( 405 fun=loglik_func, 406 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 407 order="F" 408 ), 409 jac=grad_func, 410 hess=hessian_func, 411 method="trust-ncg", 412 ) 413 self.beta_ = opt.x 414 self.minloglik_ = opt.fun 415 416 self.classes_ = np.unique(y) 417 418 return self 419 420 def predict(self, X, **kwargs): 421 """Predict test data X. 422 423 Args: 424 425 X: {array-like}, shape = [n_samples, n_features] 426 Training vectors, where n_samples is the number 427 of samples and n_features is the number of features. 428 429 **kwargs: additional parameters to be passed to 430 self.cook_test_set 431 432 Returns: 433 434 model predictions: {array-like} 435 """ 436 437 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 438 439 def predict_proba(self, X, **kwargs): 440 """Predict probabilities for test data X. 441 442 Args: 443 444 X: {array-like}, shape = [n_samples, n_features] 445 Training vectors, where n_samples is the number 446 of samples and n_features is the number of features. 447 448 **kwargs: additional parameters to be passed to 449 self.cook_test_set 450 451 Returns: 452 453 probability estimates for test data: {array-like} 454 455 """ 456 if len(X.shape) == 1: 457 n_features = X.shape[0] 458 new_X = mo.rbind( 459 X.reshape(1, n_features), 460 np.ones(n_features).reshape(1, n_features), 461 ) 462 463 Z = self.cook_test_set(new_X, **kwargs) 464 465 else: 466 Z = self.cook_test_set(X, **kwargs) 467 468 ZB = mo.safe_sparse_dot( 469 a=Z, 470 b=self.beta_.reshape( 471 self.n_classes, 472 X.shape[1] + self.n_hidden_features + self.n_clusters, 473 ).T, 474 backend=self.backend, 475 ) 476 477 exp_ZB = np.exp(ZB) 478 479 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 480 481 @property 482 def _estimator_type(self): 483 return "classifier"
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
320 def fit(self, X, y, **kwargs): 321 """Fit Ridge model to training data (X, y). 322 323 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 324 for K classes and p covariates. 325 326 Args: 327 328 X: {array-like}, shape = [n_samples, n_features] 329 Training vectors, where n_samples is the number 330 of samples and n_features is the number of features. 331 332 y: array-like, shape = [n_samples] 333 Target values. 334 335 **kwargs: additional parameters to be passed to 336 self.cook_training_set or self.obj.fit 337 338 Returns: 339 340 self: object 341 342 """ 343 344 assert mx.is_factor(y), "y must contain only integers" 345 346 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 347 348 self.n_classes = len(np.unique(y)) 349 self.classes_ = np.unique(y) # for compatibility with sklearn 350 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 351 352 Y = mo.one_hot_encode2(output_y, self.n_classes) 353 354 # optimize for beta, minimize self.loglik (maximize loglik) ----- 355 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 356 357 if self.solver == "L-BFGS-B": 358 opt = minimize( 359 fun=loglik_func, 360 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 361 jac=grad_func, 362 method=self.solver, 363 ) 364 self.beta_ = opt.x 365 self.minloglik_ = opt.fun 366 367 if self.solver in ("Newton-CG", "trust-ncg"): 368 opt = minimize( 369 fun=loglik_func, 370 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 371 jac=grad_func, 372 hess=hessian_func, 373 method=self.solver, 374 ) 375 self.beta_ = opt.x 376 self.minloglik_ = opt.fun 377 378 if self.solver == "L-BFGS-B-lstsq": 379 opt = minimize( 380 fun=loglik_func, 381 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 382 order="F" 383 ), 384 jac=grad_func, 385 method="L-BFGS-B", 386 ) 387 self.beta_ = opt.x 388 self.minloglik_ = opt.fun 389 390 if self.solver in "Newton-CG-lstsq": 391 opt = minimize( 392 fun=loglik_func, 393 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 394 order="F" 395 ), 396 jac=grad_func, 397 hess=hessian_func, 398 method="Newton-CG", 399 ) 400 self.beta_ = opt.x 401 self.minloglik_ = opt.fun 402 403 if self.solver in "trust-ncg-lstsq": 404 opt = minimize( 405 fun=loglik_func, 406 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 407 order="F" 408 ), 409 jac=grad_func, 410 hess=hessian_func, 411 method="trust-ncg", 412 ) 413 self.beta_ = opt.x 414 self.minloglik_ = opt.fun 415 416 self.classes_ = np.unique(y) 417 418 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
420 def predict(self, X, **kwargs): 421 """Predict test data X. 422 423 Args: 424 425 X: {array-like}, shape = [n_samples, n_features] 426 Training vectors, where n_samples is the number 427 of samples and n_features is the number of features. 428 429 **kwargs: additional parameters to be passed to 430 self.cook_test_set 431 432 Returns: 433 434 model predictions: {array-like} 435 """ 436 437 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
439 def predict_proba(self, X, **kwargs): 440 """Predict probabilities for test data X. 441 442 Args: 443 444 X: {array-like}, shape = [n_samples, n_features] 445 Training vectors, where n_samples is the number 446 of samples and n_features is the number of features. 447 448 **kwargs: additional parameters to be passed to 449 self.cook_test_set 450 451 Returns: 452 453 probability estimates for test data: {array-like} 454 455 """ 456 if len(X.shape) == 1: 457 n_features = X.shape[0] 458 new_X = mo.rbind( 459 X.reshape(1, n_features), 460 np.ones(n_features).reshape(1, n_features), 461 ) 462 463 Z = self.cook_test_set(new_X, **kwargs) 464 465 else: 466 Z = self.cook_test_set(X, **kwargs) 467 468 ZB = mo.safe_sparse_dot( 469 a=Z, 470 b=self.beta_.reshape( 471 self.n_classes, 472 X.shape[1] + self.n_hidden_features + self.n_clusters, 473 ).T, 474 backend=self.backend, 475 ) 476 477 exp_ZB = np.exp(ZB) 478 479 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 Examples: 87 88 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 89 90 ```python 91 import nnetsauce as ns 92 import numpy as np 93 from sklearn.datasets import load_breast_cancer 94 from sklearn.model_selection import train_test_split 95 from sklearn import metrics 96 from time import time 97 98 breast_cancer = load_breast_cancer() 99 Z = breast_cancer.data 100 t = breast_cancer.target 101 np.random.seed(123) 102 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 103 104 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 105 dropout=4.31054687e-01, 106 n_clusters=int(1.71484375e+00), 107 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 108 109 start = time() 110 fit_obj.fit(X_train, y_train) 111 print(f"Elapsed {time() - start}") 112 113 print(fit_obj.score(X_test, y_test)) 114 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 115 116 start = time() 117 preds = fit_obj.predict(X_test) 118 print(f"Elapsed {time() - start}") 119 print(metrics.classification_report(preds, y_test)) 120 ``` 121 122 """ 123 124 # construct the object ----- 125 _estimator_type = "classifier" 126 127 def __init__( 128 self, 129 n_hidden_features=5, 130 activation_name="relu", 131 a=0.01, 132 nodes_sim="sobol", 133 bias=True, 134 dropout=0, 135 n_clusters=2, 136 cluster_encode=True, 137 type_clust="kmeans", 138 type_scaling=("std", "std", "std"), 139 lambda1=0.1, 140 lambda2=0.1, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 n_hidden_features=n_hidden_features, 146 activation_name=activation_name, 147 a=a, 148 nodes_sim=nodes_sim, 149 bias=bias, 150 dropout=dropout, 151 n_clusters=n_clusters, 152 cluster_encode=cluster_encode, 153 type_clust=type_clust, 154 type_scaling=type_scaling, 155 lambda1=lambda1, 156 lambda2=lambda2, 157 seed=seed, 158 backend=backend, 159 ) 160 161 self.type_fit = "classification" 162 163 def fit(self, X, y, **kwargs): 164 """Fit Ridge model to training data (X, y). 165 166 Args: 167 168 X: {array-like}, shape = [n_samples, n_features] 169 Training vectors, where n_samples is the number 170 of samples and n_features is the number of features. 171 172 y: array-like, shape = [n_samples] 173 Target values. 174 175 **kwargs: additional parameters to be passed to 176 self.cook_training_set or self.obj.fit 177 178 Returns: 179 180 self: object 181 182 """ 183 184 sys_platform = platform.system() 185 186 assert mx.is_factor(y), "y must contain only integers" 187 188 self.classes_ = np.unique(y) # for compatibility with sklearn 189 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 190 191 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 192 193 n_X, p_X = X.shape 194 n_Z, p_Z = scaled_Z.shape 195 196 self.n_classes = len(np.unique(y)) 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes) 200 201 if self.n_clusters > 0: 202 if self.encode_clusters == True: 203 n_features = p_X + self.n_clusters 204 else: 205 n_features = p_X + 1 206 else: 207 n_features = p_X 208 209 X_ = scaled_Z[:, 0:n_features] 210 Phi_X_ = scaled_Z[:, n_features:p_Z] 211 212 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 213 np.repeat(1, X_.shape[1]) 214 ) 215 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 216 D = mo.crossprod( 217 x=Phi_X_, backend=self.backend 218 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 219 220 if sys_platform in ("Linux", "Darwin"): 221 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 222 else: 223 B_inv = pinv(B) 224 225 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 226 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 227 228 if sys_platform in ("Linux", "Darwin"): 229 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 230 else: 231 S_inv = pinv(S_mat) 232 233 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 234 inv = mo.rbind( 235 mo.cbind( 236 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 237 y=-np.transpose(Y2), 238 backend=self.backend, 239 ), 240 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 241 backend=self.backend, 242 ) 243 244 self.beta_ = mo.safe_sparse_dot( 245 a=inv, 246 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 247 backend=self.backend, 248 ) 249 self.coef_ = self.beta_ # sklearn compatibility 250 self.classes_ = np.unique(y) 251 self._is_fitted = True 252 return self 253 254 def predict(self, X, **kwargs): 255 """Predict test data X. 256 257 Args: 258 259 X: {array-like}, shape = [n_samples, n_features] 260 Training vectors, where n_samples is the number 261 of samples and n_features is the number of features. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_test_set 265 266 Returns: 267 268 model predictions: {array-like} 269 270 """ 271 272 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 273 274 def predict_proba(self, X, **kwargs): 275 """Predict probabilities for test data X. 276 277 Args: 278 279 X: {array-like}, shape = [n_samples, n_features] 280 Training vectors, where n_samples is the number 281 of samples and n_features is the number of features. 282 283 **kwargs: additional parameters to be passed to 284 self.cook_test_set 285 286 Returns: 287 288 probability estimates for test data: {array-like} 289 290 """ 291 292 if len(X.shape) == 1: 293 n_features = X.shape[0] 294 new_X = mo.rbind( 295 x=X.reshape(1, n_features), 296 y=np.ones(n_features).reshape(1, n_features), 297 backend=self.backend, 298 ) 299 300 Z = self.cook_test_set(new_X, **kwargs) 301 302 else: 303 Z = self.cook_test_set(X, **kwargs) 304 305 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 306 307 exp_ZB = np.exp(ZB) 308 309 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 310 311 def score(self, X, y, scoring=None): 312 """Scoring function for classification. 313 314 Args: 315 316 X: {array-like}, shape = [n_samples, n_features] 317 Training vectors, where n_samples is the number 318 of samples and n_features is the number of features. 319 320 y: array-like, shape = [n_samples] 321 Target values. 322 323 scoring: str 324 scoring method (default is accuracy) 325 326 Returns: 327 328 score: float 329 """ 330 331 if scoring is None: 332 scoring = "accuracy" 333 334 if scoring == "accuracy": 335 return skm2.accuracy_score(y, self.predict(X)) 336 337 if scoring == "f1": 338 return skm2.f1_score(y, self.predict(X)) 339 340 if scoring == "precision": 341 return skm2.precision_score(y, self.predict(X)) 342 343 if scoring == "recall": 344 return skm2.recall_score(y, self.predict(X)) 345 346 if scoring == "roc_auc": 347 return skm2.roc_auc_score(y, self.predict(X)) 348 349 if scoring == "log_loss": 350 return skm2.log_loss(y, self.predict_proba(X)) 351 352 if scoring == "balanced_accuracy": 353 return skm2.balanced_accuracy_score(y, self.predict(X)) 354 355 if scoring == "average_precision": 356 return skm2.average_precision_score(y, self.predict(X)) 357 358 if scoring == "neg_brier_score": 359 return -skm2.brier_score_loss(y, self.predict_proba(X)) 360 361 if scoring == "neg_log_loss": 362 return -skm2.log_loss(y, self.predict_proba(X)) 363 364 @property 365 def _estimator_type(self): 366 return "classifier" 367 368 def partial_fit( 369 self, X, y, classes=None, learning_rate=0.01, decay=0.001, **kwargs 370 ): 371 """Incrementally fit the Ridge model using SGD-style updates. 372 373 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 374 for online learning with individual samples. 375 376 Args: 377 X: {array-like}, shape = [n_samples, n_features] 378 Training vectors for this batch 379 380 y: array-like, shape = [n_samples] 381 Target values for this batch 382 383 classes: array-like, shape = [n_classes], optional 384 List of all possible target classes. Must be provided on first call 385 to partial_fit if not already fitted. 386 387 learning_rate: float, default=0.01 388 Initial learning rate for SGD updates 389 390 decay: float, default=0.001 391 Learning rate decay parameter 392 393 **kwargs: additional parameters to be passed to self.cook_training_set 394 395 Returns: 396 self: object 397 """ 398 # Input validation 399 X = np.asarray(X) 400 y = np.asarray(y) 401 402 if X.shape[0] != y.shape[0]: 403 raise ValueError("X and y must have the same number of samples") 404 405 assert mx.is_factor(y), "y must contain only integers" 406 407 # Handle classes on first call 408 if not self._is_fitted: 409 if classes is not None: 410 self.classes_ = np.array(classes) 411 self.n_classes_ = len(self.classes_) 412 else: 413 self.classes_ = np.unique(y) 414 self.n_classes_ = len(self.classes_) 415 416 self.n_classes = len(self.classes_) 417 418 # Initialize learning parameters 419 self.initial_learning_rate = learning_rate 420 self.decay = decay 421 self._step_count = 0 422 self._is_fitted = True 423 424 else: 425 # Check for new classes 426 new_classes = np.setdiff1d(y, self.classes_) 427 if len(new_classes) > 0: 428 raise ValueError( 429 f"New classes {new_classes} encountered. " 430 "partial_fit cannot handle new classes after first call." 431 ) 432 433 # Process the batch 434 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 435 436 # Get dimensions 437 n_samples, n_features_total = scaled_Z.shape 438 n_original_features = X.shape[1] 439 440 # Create one-hot encoded targets 441 Y = mo.one_hot_encode2(output_y, self.n_classes) 442 443 # Determine feature dimensions for regularization 444 if self.n_clusters > 0: 445 if self.cluster_encode: 446 n_direct_features = n_original_features + self.n_clusters 447 else: 448 n_direct_features = n_original_features + 1 449 else: 450 n_direct_features = n_original_features 451 452 # Initialize beta_ if first time 453 if not hasattr(self, "beta_") or self.beta_ is None: 454 self.beta_ = np.zeros((n_features_total, self.n_classes)) 455 456 # Precompute indices for regularization 457 direct_indices = slice(0, n_direct_features) 458 hidden_indices = slice(n_direct_features, n_features_total) 459 460 # Process each sample with SGD 461 for i in range(n_samples): 462 self._step_count += 1 463 464 # Current learning rate with decay 465 current_lr = self.initial_learning_rate / ( 466 1 + self.decay * self._step_count 467 ) 468 469 # Current sample and target 470 x_i = scaled_Z[i, :] # Feature vector 471 y_i = Y[i, :] # Target vector (one-hot) 472 473 # Prediction: x_i^T * beta 474 prediction = x_i @ self.beta_ 475 476 # Error: y_i - prediction 477 error = y_i - prediction 478 479 # Gradient update: current_lr * x_i * error 480 gradient_update = current_lr * np.outer(x_i, error) 481 482 # Regularization terms (more efficient indexing) 483 reg_update = np.zeros_like(self.beta_) 484 reg_update[direct_indices, :] = ( 485 current_lr * self.lambda1 * self.beta_[direct_indices, :] 486 ) 487 reg_update[hidden_indices, :] = ( 488 current_lr * self.lambda2 * self.beta_[hidden_indices, :] 489 ) 490 491 # Combined update: beta = beta + gradient_update - reg_update 492 self.beta_ += gradient_update - reg_update 493 494 self.coef_ = self.beta_ # sklearn compatibility 495 496 return self
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
163 def fit(self, X, y, **kwargs): 164 """Fit Ridge model to training data (X, y). 165 166 Args: 167 168 X: {array-like}, shape = [n_samples, n_features] 169 Training vectors, where n_samples is the number 170 of samples and n_features is the number of features. 171 172 y: array-like, shape = [n_samples] 173 Target values. 174 175 **kwargs: additional parameters to be passed to 176 self.cook_training_set or self.obj.fit 177 178 Returns: 179 180 self: object 181 182 """ 183 184 sys_platform = platform.system() 185 186 assert mx.is_factor(y), "y must contain only integers" 187 188 self.classes_ = np.unique(y) # for compatibility with sklearn 189 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 190 191 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 192 193 n_X, p_X = X.shape 194 n_Z, p_Z = scaled_Z.shape 195 196 self.n_classes = len(np.unique(y)) 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes) 200 201 if self.n_clusters > 0: 202 if self.encode_clusters == True: 203 n_features = p_X + self.n_clusters 204 else: 205 n_features = p_X + 1 206 else: 207 n_features = p_X 208 209 X_ = scaled_Z[:, 0:n_features] 210 Phi_X_ = scaled_Z[:, n_features:p_Z] 211 212 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 213 np.repeat(1, X_.shape[1]) 214 ) 215 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 216 D = mo.crossprod( 217 x=Phi_X_, backend=self.backend 218 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 219 220 if sys_platform in ("Linux", "Darwin"): 221 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 222 else: 223 B_inv = pinv(B) 224 225 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 226 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 227 228 if sys_platform in ("Linux", "Darwin"): 229 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 230 else: 231 S_inv = pinv(S_mat) 232 233 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 234 inv = mo.rbind( 235 mo.cbind( 236 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 237 y=-np.transpose(Y2), 238 backend=self.backend, 239 ), 240 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 241 backend=self.backend, 242 ) 243 244 self.beta_ = mo.safe_sparse_dot( 245 a=inv, 246 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 247 backend=self.backend, 248 ) 249 self.coef_ = self.beta_ # sklearn compatibility 250 self.classes_ = np.unique(y) 251 self._is_fitted = True 252 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
254 def predict(self, X, **kwargs): 255 """Predict test data X. 256 257 Args: 258 259 X: {array-like}, shape = [n_samples, n_features] 260 Training vectors, where n_samples is the number 261 of samples and n_features is the number of features. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_test_set 265 266 Returns: 267 268 model predictions: {array-like} 269 270 """ 271 272 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
274 def predict_proba(self, X, **kwargs): 275 """Predict probabilities for test data X. 276 277 Args: 278 279 X: {array-like}, shape = [n_samples, n_features] 280 Training vectors, where n_samples is the number 281 of samples and n_features is the number of features. 282 283 **kwargs: additional parameters to be passed to 284 self.cook_test_set 285 286 Returns: 287 288 probability estimates for test data: {array-like} 289 290 """ 291 292 if len(X.shape) == 1: 293 n_features = X.shape[0] 294 new_X = mo.rbind( 295 x=X.reshape(1, n_features), 296 y=np.ones(n_features).reshape(1, n_features), 297 backend=self.backend, 298 ) 299 300 Z = self.cook_test_set(new_X, **kwargs) 301 302 else: 303 Z = self.cook_test_set(X, **kwargs) 304 305 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 306 307 exp_ZB = np.exp(ZB) 308 309 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
311 def score(self, X, y, scoring=None): 312 """Scoring function for classification. 313 314 Args: 315 316 X: {array-like}, shape = [n_samples, n_features] 317 Training vectors, where n_samples is the number 318 of samples and n_features is the number of features. 319 320 y: array-like, shape = [n_samples] 321 Target values. 322 323 scoring: str 324 scoring method (default is accuracy) 325 326 Returns: 327 328 score: float 329 """ 330 331 if scoring is None: 332 scoring = "accuracy" 333 334 if scoring == "accuracy": 335 return skm2.accuracy_score(y, self.predict(X)) 336 337 if scoring == "f1": 338 return skm2.f1_score(y, self.predict(X)) 339 340 if scoring == "precision": 341 return skm2.precision_score(y, self.predict(X)) 342 343 if scoring == "recall": 344 return skm2.recall_score(y, self.predict(X)) 345 346 if scoring == "roc_auc": 347 return skm2.roc_auc_score(y, self.predict(X)) 348 349 if scoring == "log_loss": 350 return skm2.log_loss(y, self.predict_proba(X)) 351 352 if scoring == "balanced_accuracy": 353 return skm2.balanced_accuracy_score(y, self.predict(X)) 354 355 if scoring == "average_precision": 356 return skm2.average_precision_score(y, self.predict(X)) 357 358 if scoring == "neg_brier_score": 359 return -skm2.brier_score_loss(y, self.predict_proba(X)) 360 361 if scoring == "neg_log_loss": 362 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: